From 2fbb3152165bdcb84f16b795863476c1816428a2 Mon Sep 17 00:00:00 2001 From: David Degazio Date: Mon, 24 Apr 2023 08:58:19 -0700 Subject: [PATCH] Use bump ranges instead of free list in JSC allocator https://bugs.webkit.org/show_bug.cgi?id=255798 rdar://108377867 Reviewed by Yusuke Suzuki and Mark Lam. Replaces the JSC free list allocator with a list of free intervals. Each interval represents a contiguous block of dead cells, and within each interval we can bump-allocate. This should mean we bump-allocate more often, and potentially speed up sweeping by nature of installing fewer free list cells, especially for use cases where we don't often manage to totally empty blocks currently. * Source/JavaScriptCore/heap/FreeList.cpp: (JSC::FreeList::clear): (JSC::FreeList::initialize): (JSC::FreeList::contains const): (JSC::FreeList::dump const): (JSC::FreeList::initializeList): Deleted. (JSC::FreeList::initializeBump): Deleted. * Source/JavaScriptCore/heap/FreeList.h: (JSC::FreeCell::scramble): (JSC::FreeCell::descramble): (JSC::FreeCell::makeLast): (JSC::FreeCell::setNext): (JSC::FreeCell::decode): (JSC::FreeCell::offsetOfScrambledBits): (JSC::FreeList::allocationWillFail const): (JSC::FreeList::isSentinel): (JSC::FreeList::offsetOfHead): (JSC::FreeList::offsetOfPayloadStart): (JSC::FreeList::offsetOfPayloadEnd): (JSC::FreeList::head const): (JSC::FreeCell::next const): Deleted. (JSC::FreeCell::offsetOfScrambledNext): Deleted. (JSC::FreeList::offsetOfScrambledHead): Deleted. (JSC::FreeList::offsetOfRemaining): Deleted. * Source/JavaScriptCore/heap/FreeListInlines.h: (JSC::FreeList::allocate): (JSC::FreeList::forEach const): * Source/JavaScriptCore/heap/MarkedBlockInlines.h: (JSC::MarkedBlock::Handle::specializedSweep): * Source/JavaScriptCore/jit/AssemblyHelpers.cpp: (JSC::AssemblyHelpers::jitAssertTagsInPlace): (JSC::AssemblyHelpers::emitExceptionCheck): (JSC::AssemblyHelpers::emitNonPatchableExceptionCheck): (JSC::AssemblyHelpers::loadProperty): (JSC::AssemblyHelpers::storeProperty): (JSC::AssemblyHelpers::emitAllocateWithNonNullAllocator): (JSC::AssemblyHelpers::emitAllocateVariableSized): (JSC::AssemblyHelpers::restoreCalleeSavesFromEntryFrameCalleeSavesBuffer): (JSC::AssemblyHelpers::emitRestoreCalleeSavesFor): Canonical link: https://commits.webkit.org/263313@main --- .../assembler/MacroAssemblerARM64.h | 7 + Source/JavaScriptCore/heap/FreeList.cpp | 47 +++--- Source/JavaScriptCore/heap/FreeList.h | 67 +++++--- Source/JavaScriptCore/heap/FreeListInlines.h | 48 +++--- .../JavaScriptCore/heap/MarkedBlockInlines.h | 62 +++++-- Source/JavaScriptCore/jit/AssemblyHelpers.cpp | 159 +++++++++++++----- 6 files changed, 259 insertions(+), 131 deletions(-) diff --git a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h index 32e9010b5d56..770e354f96b6 100644 --- a/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h +++ b/Source/JavaScriptCore/assembler/MacroAssemblerARM64.h @@ -360,6 +360,13 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler { m_assembler.add<64>(dest, dest, dataTempRegister); } + void add64(RegisterID src, Address dest) + { + load64(dest, getCachedDataTempRegisterIDAndInvalidate()); + m_assembler.add<64>(src, dataTempRegister, dataTempRegister); + store64(dataTempRegister, dest); + } + void add64(AbsoluteAddress src, RegisterID dest) { load64(src.m_ptr, getCachedDataTempRegisterIDAndInvalidate()); diff --git a/Source/JavaScriptCore/heap/FreeList.cpp b/Source/JavaScriptCore/heap/FreeList.cpp index 80c5872d060a..7dd282b38c30 100644 --- a/Source/JavaScriptCore/heap/FreeList.cpp +++ b/Source/JavaScriptCore/heap/FreeList.cpp @@ -39,45 +39,38 @@ FreeList::~FreeList() void FreeList::clear() { - m_scrambledHead = 0; + m_intervalStart = nullptr; + m_intervalEnd = nullptr; + m_nextInterval = bitwise_cast(static_cast(1)); m_secret = 0; - m_payloadEnd = nullptr; - m_remaining = 0; m_originalSize = 0; } -void FreeList::initializeList(FreeCell* head, uintptr_t secret, unsigned bytes) +void FreeList::initialize(FreeCell* start, uint64_t secret, unsigned bytes) { - // It's *slightly* more optimal to use a scrambled head. It saves a register on the fast path. - m_scrambledHead = FreeCell::scramble(head, secret); + if (UNLIKELY(!start)) { + clear(); + return; + } m_secret = secret; - m_payloadEnd = nullptr; - m_remaining = 0; + m_nextInterval = start; + FreeCell::advance(m_secret, m_nextInterval, m_intervalStart, m_intervalEnd); m_originalSize = bytes; } -void FreeList::initializeBump(char* payloadEnd, unsigned remaining) -{ - m_scrambledHead = 0; - m_secret = 0; - m_payloadEnd = payloadEnd; - m_remaining = remaining; - m_originalSize = remaining; -} - bool FreeList::contains(HeapCell* target) const { - if (m_remaining) { - const void* start = (m_payloadEnd - m_remaining); - const void* end = m_payloadEnd; - return (start <= target) && (target < end); - } + char* targetPtr = bitwise_cast(target); + if (m_intervalStart <= targetPtr && targetPtr < m_intervalEnd) + return true; - FreeCell* candidate = head(); - while (candidate) { - if (bitwise_cast(candidate) == target) + FreeCell* candidate = nextInterval(); + while (!isSentinel(candidate)) { + char* start; + char* end; + FreeCell::advance(m_secret, candidate, start, end); + if (start <= targetPtr && targetPtr < end) return true; - candidate = candidate->next(m_secret); } return false; @@ -85,7 +78,7 @@ bool FreeList::contains(HeapCell* target) const void FreeList::dump(PrintStream& out) const { - out.print("{head = ", RawPointer(head()), ", secret = ", m_secret, ", payloadEnd = ", RawPointer(m_payloadEnd), ", remaining = ", m_remaining, ", originalSize = ", m_originalSize, "}"); + out.print("{nextInterval = ", RawPointer(nextInterval()), ", secret = ", m_secret, ", intervalStart = ", RawPointer(m_intervalStart), ", intervalEnd = ", RawPointer(m_intervalEnd), ", originalSize = ", m_originalSize, "}"); } } // namespace JSC diff --git a/Source/JavaScriptCore/heap/FreeList.h b/Source/JavaScriptCore/heap/FreeList.h index 9d6dd4afb13a..7f23ca22375c 100644 --- a/Source/JavaScriptCore/heap/FreeList.h +++ b/Source/JavaScriptCore/heap/FreeList.h @@ -27,36 +27,53 @@ #include #include +#include namespace JSC { class HeapCell; struct FreeCell { - static uintptr_t scramble(FreeCell* cell, uintptr_t secret) + static ALWAYS_INLINE uint64_t scramble(int32_t offsetToNext, uint32_t lengthInBytes, uint64_t secret) { - return bitwise_cast(cell) ^ secret; + ASSERT(static_cast(lengthInBytes) << 32 | offsetToNext); + return (static_cast(lengthInBytes) << 32 | offsetToNext) ^ secret; } - - static FreeCell* descramble(uintptr_t cell, uintptr_t secret) + + static ALWAYS_INLINE std::tuple descramble(uint64_t scrambledBits, uint64_t secret) { - return bitwise_cast(cell ^ secret); + static_assert(WTF::isPowerOfTwo(sizeof(FreeCell))); // Make sure this division isn't super costly. + uint64_t descrambledBits = scrambledBits ^ secret; + return { static_cast(static_cast(descrambledBits)), static_cast(descrambledBits >> 32u) }; } - - void setNext(FreeCell* next, uintptr_t secret) + + ALWAYS_INLINE void makeLast(uint32_t lengthInBytes, uint64_t secret) { - scrambledNext = scramble(next, secret); + scrambledBits = scramble(1, lengthInBytes, secret); // We use a set LSB to indicate a sentinel pointer. } - - FreeCell* next(uintptr_t secret) const + + ALWAYS_INLINE void setNext(FreeCell* next, uint32_t lengthInBytes, uint64_t secret) { - return descramble(scrambledNext, secret); + scrambledBits = scramble((next - this) * sizeof(FreeCell), lengthInBytes, secret); } - - static ptrdiff_t offsetOfScrambledNext() { return OBJECT_OFFSETOF(FreeCell, scrambledNext); } + + ALWAYS_INLINE std::tuple decode(uint64_t secret) + { + return descramble(scrambledBits, secret); + } + + static ALWAYS_INLINE void advance(uint64_t secret, FreeCell*& interval, char*& intervalStart, char*& intervalEnd) + { + auto [offsetToNext, lengthInBytes] = interval->decode(secret); + intervalStart = bitwise_cast(interval); + intervalEnd = intervalStart + lengthInBytes; + interval = bitwise_cast(intervalStart + offsetToNext); + } + + static ALWAYS_INLINE ptrdiff_t offsetOfScrambledBits() { return OBJECT_OFFSETOF(FreeCell, scrambledBits); } uint64_t preservedBitsForCrashAnalysis; - uintptr_t scrambledNext; + uint64_t scrambledBits; }; class FreeList { @@ -66,10 +83,9 @@ class FreeList { void clear(); - JS_EXPORT_PRIVATE void initializeList(FreeCell* head, uintptr_t secret, unsigned bytes); - JS_EXPORT_PRIVATE void initializeBump(char* payloadEnd, unsigned remaining); + JS_EXPORT_PRIVATE void initialize(FreeCell* head, uint64_t secret, unsigned bytes); - bool allocationWillFail() const { return !head() && !m_remaining; } + bool allocationWillFail() const { return m_intervalStart >= m_intervalEnd && isSentinel(nextInterval()); } bool allocationWillSucceed() const { return !allocationWillFail(); } template @@ -82,10 +98,11 @@ class FreeList { unsigned originalSize() const { return m_originalSize; } - static ptrdiff_t offsetOfScrambledHead() { return OBJECT_OFFSETOF(FreeList, m_scrambledHead); } + static bool isSentinel(FreeCell* cell) { return bitwise_cast(cell) & 1; } + static ptrdiff_t offsetOfNextInterval() { return OBJECT_OFFSETOF(FreeList, m_nextInterval); } static ptrdiff_t offsetOfSecret() { return OBJECT_OFFSETOF(FreeList, m_secret); } - static ptrdiff_t offsetOfPayloadEnd() { return OBJECT_OFFSETOF(FreeList, m_payloadEnd); } - static ptrdiff_t offsetOfRemaining() { return OBJECT_OFFSETOF(FreeList, m_remaining); } + static ptrdiff_t offsetOfIntervalStart() { return OBJECT_OFFSETOF(FreeList, m_intervalStart); } + static ptrdiff_t offsetOfIntervalEnd() { return OBJECT_OFFSETOF(FreeList, m_intervalEnd); } static ptrdiff_t offsetOfOriginalSize() { return OBJECT_OFFSETOF(FreeList, m_originalSize); } static ptrdiff_t offsetOfCellSize() { return OBJECT_OFFSETOF(FreeList, m_cellSize); } @@ -94,12 +111,12 @@ class FreeList { unsigned cellSize() const { return m_cellSize; } private: - FreeCell* head() const { return FreeCell::descramble(m_scrambledHead, m_secret); } + FreeCell* nextInterval() const { return m_nextInterval; } - uintptr_t m_scrambledHead { 0 }; - uintptr_t m_secret { 0 }; - char* m_payloadEnd { nullptr }; - unsigned m_remaining { 0 }; + char* m_intervalStart { nullptr }; + char* m_intervalEnd { nullptr }; + FreeCell* m_nextInterval { bitwise_cast(static_cast(1)) }; + uint64_t m_secret { 0 }; unsigned m_originalSize { 0 }; unsigned m_cellSize { 0 }; }; diff --git a/Source/JavaScriptCore/heap/FreeListInlines.h b/Source/JavaScriptCore/heap/FreeListInlines.h index 84614f8fdc3d..88c289f286be 100644 --- a/Source/JavaScriptCore/heap/FreeListInlines.h +++ b/Source/JavaScriptCore/heap/FreeListInlines.h @@ -33,36 +33,44 @@ namespace JSC { template ALWAYS_INLINE HeapCell* FreeList::allocate(const Func& slowPath) { - unsigned remaining = m_remaining; - if (remaining) { - unsigned cellSize = m_cellSize; - remaining -= cellSize; - m_remaining = remaining; - return bitwise_cast(m_payloadEnd - remaining - cellSize); + unsigned cellSize = m_cellSize; + if (LIKELY(m_intervalStart < m_intervalEnd)) { + char* result = m_intervalStart; + m_intervalStart += cellSize; + return bitwise_cast(result); } - FreeCell* result = head(); - if (UNLIKELY(!result)) + FreeCell* cell = nextInterval(); + if (UNLIKELY(isSentinel(cell))) return slowPath(); + + FreeCell::advance(m_secret, m_nextInterval, m_intervalStart, m_intervalEnd); - m_scrambledHead = result->scrambledNext; + // It's an invariant of our allocator that we don't create empty intervals, so there + // should always be enough space remaining to allocate a cell. + char* result = m_intervalStart; + m_intervalStart += cellSize; return bitwise_cast(result); } template void FreeList::forEach(const Func& func) const { - if (m_remaining) { - for (unsigned remaining = m_remaining; remaining; remaining -= m_cellSize) - func(bitwise_cast(m_payloadEnd - remaining)); - } else { - for (FreeCell* cell = head(); cell;) { - // We can use this to overwrite free objects before destroying the free list. So, we need - // to get next before proceeding further. - FreeCell* next = cell->next(m_secret); - func(bitwise_cast(cell)); - cell = next; - } + FreeCell* cell = nextInterval(); + char* intervalStart = m_intervalStart; + char* intervalEnd = m_intervalEnd; + ASSERT(intervalEnd - intervalStart < (ptrdiff_t)(16 * KB)); + + while (true) { + for (; intervalStart < intervalEnd; intervalStart += m_cellSize) + func(bitwise_cast(intervalStart)); + + // If we explore the whole interval and the cell is the sentinel value, though, we should + // immediately exit so we don't decode anything out of bounds. + if (isSentinel(cell)) + break; + + FreeCell::advance(m_secret, cell, intervalStart, intervalEnd); } } diff --git a/Source/JavaScriptCore/heap/MarkedBlockInlines.h b/Source/JavaScriptCore/heap/MarkedBlockInlines.h index 463091f19597..9b4cac473089 100644 --- a/Source/JavaScriptCore/heap/MarkedBlockInlines.h +++ b/Source/JavaScriptCore/heap/MarkedBlockInlines.h @@ -254,6 +254,7 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand unsigned cellSize = this->cellSize(); VM& vm = this->vm(); + uint64_t secret = vm.heapRandom().getUint64(); auto destroy = [&] (void* cell) { JSCell* jsCell = static_cast(cell); if (!jsCell->isZapped()) { @@ -293,9 +294,11 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand destroy(cell); } if (sweepMode == SweepToFreeList) { - if (scribbleMode == Scribble) + if (UNLIKELY(scribbleMode == Scribble)) scribble(payloadBegin, payloadEnd - payloadBegin); - freeList->initializeBump(payloadEnd, payloadEnd - payloadBegin); + FreeCell* interval = reinterpret_cast_ptr(payloadBegin); + interval->makeLast(payloadEnd - payloadBegin, secret); + freeList->initialize(interval, secret, payloadEnd - payloadBegin); } if (false) dataLog("Quickly swept block ", RawPointer(this), " with cell size ", cellSize, " and attributes ", m_attributes, ": ", pointerDump(freeList), "\n"); @@ -305,10 +308,11 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand // This produces a free list that is ordered in reverse through the block. // This is fine, since the allocation code makes no assumptions about the // order of the free list. - FreeCell* head = nullptr; - size_t count = 0; - uintptr_t secret = static_cast(vm.heapRandom().getUint64()); + size_t freedBytes = 0; bool isEmpty = true; + FreeCell* head = nullptr; + size_t currentInterval = 0; + size_t previousDeadCell = 0; // We try to allocate the deadCells vector entirely on the stack if possible. // Otherwise, we use the maximum permitted space (currently 8kB) to store as @@ -323,21 +327,46 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand auto handleDeadCell = [&] (size_t i) { HeapCell* cell = reinterpret_cast_ptr(&block.atoms()[i]); - if (destructionMode != BlockHasNoDestructors) destroy(cell); - if (sweepMode == SweepToFreeList) { - FreeCell* freeCell = reinterpret_cast_ptr(cell); - if (scribbleMode == Scribble) - scribble(freeCell, cellSize); - freeCell->setNext(head, secret); - head = freeCell; - ++count; + if (UNLIKELY(scribbleMode == Scribble)) + scribble(cell, cellSize); + + // The following check passing implies there was at least one live cell + // between us and the last dead cell, meaning that the previous dead + // cell is the start of its interval. + if (i + m_atomsPerCell < previousDeadCell) { + size_t intervalLength = currentInterval * atomSize; + FreeCell* cell = reinterpret_cast_ptr(&block.atoms()[previousDeadCell]); + if (LIKELY(head)) + cell->setNext(head, intervalLength, secret); + else + cell->makeLast(intervalLength, secret); + freedBytes += intervalLength; + head = cell; + currentInterval = 0; + } + currentInterval += m_atomsPerCell; + previousDeadCell = i; } }; - for (size_t i = m_startAtom; i < endAtom; i += m_atomsPerCell) { + auto checkForFinalInterval = [&] () { + if (sweepMode == SweepToFreeList && currentInterval) { + size_t intervalLength = currentInterval * atomSize; + FreeCell* cell = reinterpret_cast_ptr(&block.atoms()[previousDeadCell]); + + if (LIKELY(head)) + cell->setNext(head, intervalLength, secret); + else + cell->makeLast(intervalLength, secret); + freedBytes += intervalLength; + head = cell; + } + }; + + for (int i = endAtom - m_atomsPerCell; i >= static_cast(m_startAtom); i -= m_atomsPerCell) { if (emptyMode == NotEmpty && ((marksMode == MarksNotStale && header.m_marks.get(i)) || (newlyAllocatedMode == HasNewlyAllocated && header.m_newlyAllocated.get(i)))) { @@ -353,6 +382,8 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand } else handleDeadCell(i); } + if (destructionMode != BlockHasDestructorsAndCollectorIsRunning) + checkForFinalInterval(); // We need this to handle the first interval in the block, since it has no dead cells before it. // We only want to discard the newlyAllocated bits if we're creating a FreeList, // otherwise we would lose information on what's currently alive. @@ -365,10 +396,11 @@ void MarkedBlock::Handle::specializedSweep(FreeList* freeList, MarkedBlock::Hand if (destructionMode == BlockHasDestructorsAndCollectorIsRunning) { for (size_t i : deadCells) handleDeadCell(i); + checkForFinalInterval(); } if (sweepMode == SweepToFreeList) { - freeList->initializeList(head, secret, count * cellSize); + freeList->initialize(head, secret, freedBytes); setIsFreeListed(); } else if (isEmpty) m_directory->setIsEmpty(NoLockingNecessary, this, true); diff --git a/Source/JavaScriptCore/jit/AssemblyHelpers.cpp b/Source/JavaScriptCore/jit/AssemblyHelpers.cpp index 97218f0b9124..d2104a64b1c6 100644 --- a/Source/JavaScriptCore/jit/AssemblyHelpers.cpp +++ b/Source/JavaScriptCore/jit/AssemblyHelpers.cpp @@ -20,7 +20,7 @@ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" @@ -78,7 +78,7 @@ void AssemblyHelpers::decrementSuperSamplerCount() { sub32(TrustedImm32(1), AbsoluteAddress(bitwise_cast(&g_superSamplerCount))); } - + void AssemblyHelpers::purifyNaN(FPRReg fpr) { MacroAssembler::Jump notNaN = branchIfNotNaN(fpr); @@ -152,7 +152,7 @@ void AssemblyHelpers::jitAssertTagsInPlace() abortWithReason(AHNumberTagNotInPlace); breakpoint(); ok.link(this); - + ok = branch64(Equal, GPRInfo::notCellMaskRegister, TrustedImm64(JSValue::NotCellMask)); abortWithReason(AHNotCellMaskNotInPlace); ok.link(this); @@ -327,20 +327,20 @@ AssemblyHelpers::Jump AssemblyHelpers::emitExceptionCheck(VM& vm, ExceptionCheck if (width == FarJumpWidth) kind = (kind == NormalExceptionCheck ? InvertedExceptionCheck : NormalExceptionCheck); - + Jump result; #if USE(JSVALUE64) result = branchTest64(kind == NormalExceptionCheck ? NonZero : Zero, AbsoluteAddress(vm.addressOfException())); #elif USE(JSVALUE32_64) result = branch32(kind == NormalExceptionCheck ? NotEqual : Equal, AbsoluteAddress(vm.addressOfException()), TrustedImm32(0)); #endif - + if (width == NormalJumpWidth) return result; PatchableJump realJump = patchableJump(); result.link(this); - + return realJump.m_jump; } @@ -355,7 +355,7 @@ AssemblyHelpers::Jump AssemblyHelpers::emitNonPatchableExceptionCheck(VM& vm) #elif USE(JSVALUE32_64) result = branch32(NotEqual, AbsoluteAddress(vm.addressOfException()), TrustedImm32(0)); #endif - + return result; } @@ -391,21 +391,21 @@ void AssemblyHelpers::emitStoreStructureWithTypeInfo(AssemblyHelpers& jit, Trust void AssemblyHelpers::loadProperty(GPRReg object, GPRReg offset, JSValueRegs result) { Jump isInline = branch32(LessThan, offset, TrustedImm32(firstOutOfLineOffset)); - + loadPtr(Address(object, JSObject::butterflyOffset()), result.payloadGPR()); neg32(offset); signExtend32ToPtr(offset, offset); Jump ready = jump(); - + isInline.link(this); addPtr( TrustedImm32( static_cast(sizeof(JSObject)) - (static_cast(firstOutOfLineOffset) - 2) * static_cast(sizeof(EncodedJSValue))), object, result.payloadGPR()); - + ready.link(this); - + loadValue( BaseIndex( result.payloadGPR(), offset, TimesEight, (firstOutOfLineOffset - 2) * sizeof(EncodedJSValue)), @@ -415,21 +415,21 @@ void AssemblyHelpers::loadProperty(GPRReg object, GPRReg offset, JSValueRegs res void AssemblyHelpers::storeProperty(JSValueRegs value, GPRReg object, GPRReg offset, GPRReg scratch) { Jump isInline = branch32(LessThan, offset, TrustedImm32(firstOutOfLineOffset)); - + loadPtr(Address(object, JSObject::butterflyOffset()), scratch); neg32(offset); signExtend32ToPtr(offset, offset); Jump ready = jump(); - + isInline.link(this); addPtr( TrustedImm32( static_cast(sizeof(JSObject)) - (static_cast(firstOutOfLineOffset) - 2) * static_cast(sizeof(EncodedJSValue))), object, scratch); - + ready.link(this); - + storeValue(value, BaseIndex(scratch, offset, TimesEight, (firstOutOfLineOffset - 2) * sizeof(EncodedJSValue))); } @@ -683,43 +683,114 @@ void AssemblyHelpers::emitAllocateWithNonNullAllocator(GPRReg resultGPR, const J // - We *can* use RegisterSetBuilder::macroScratchRegisters on ARM. Jump popPath; + Jump zeroPath; Jump done; - + if (allocator.isConstant()) move(TrustedImmPtr(allocator.allocator().localAllocator()), allocatorGPR); - load32(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfRemaining()), resultGPR); - popPath = branchTest32(Zero, resultGPR); +#if CPU(ARM) || CPU(ARM64) + auto dataTempRegister = getCachedDataTempRegisterIDAndInvalidate(); +#endif + +#if CPU(ARM64) + // On ARM64, we can leverage instructions like load-pair and shifted-add to make loading from the free list + // and extracting interval information use less instructions. + + // Assert that we can use loadPairPtr for the interval bounds and nextInterval/secret. + RELEASE_ASSERT(FreeList::offsetOfIntervalEnd() - FreeList::offsetOfIntervalStart() == sizeof(uintptr_t)); + RELEASE_ASSERT(FreeList::offsetOfSecret() - FreeList::offsetOfNextInterval() == sizeof(uintptr_t)); + + // Bump allocation (fast path) + loadPairPtr(allocatorGPR, TrustedImm32(LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart()), resultGPR, scratchGPR); + popPath = branchPtr(RelationalCondition::AboveOrEqual, resultGPR, scratchGPR); + auto bumpLabel = label(); if (allocator.isConstant()) - add32(TrustedImm32(-allocator.allocator().cellSize()), resultGPR, scratchGPR); + addPtr(TrustedImm32(allocator.allocator().cellSize()), resultGPR, scratchGPR); else { - move(resultGPR, scratchGPR); - sub32(Address(allocatorGPR, LocalAllocator::offsetOfCellSize()), scratchGPR); + load32(Address(allocatorGPR, LocalAllocator::offsetOfCellSize()), scratchGPR); + addPtr(resultGPR, scratchGPR); } - negPtr(resultGPR); - store32(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfRemaining())); - Address payloadEndAddr = Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfPayloadEnd()); - addPtr(payloadEndAddr, resultGPR); + storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart())); + done = jump(); + // Get next interval (slower path) + popPath.link(this); + loadPairPtr(allocatorGPR, TrustedImm32(LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval()), resultGPR, scratchGPR); + zeroPath = branchTestPtr(ResultCondition::NonZero, resultGPR, TrustedImm32(1)); + xor64(Address(resultGPR, FreeCell::offsetOfScrambledBits()), scratchGPR); + addSignExtend64(resultGPR, scratchGPR, dataTempRegister); + storePtr(dataTempRegister, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval())); + addUnsignedRightShift64(resultGPR, scratchGPR, TrustedImm32(32), scratchGPR); + storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalEnd())); + jump(bumpLabel); +#elif CPU(X86_64) + // On x86_64, we can leverage better support for memory operands to directly interact with the free + // list instead of relying on registers as much. + + // Bump allocation (fast path) + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart()), resultGPR); + popPath = branchPtr(RelationalCondition::AboveOrEqual, resultGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalEnd())); + auto bumpLabel = label(); + if (allocator.isConstant()) + add64(TrustedImm32(allocator.allocator().cellSize()), Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart())); + else { + load32(Address(allocatorGPR, LocalAllocator::offsetOfCellSize()), scratchGPR); + add64(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart())); + } done = jump(); - + + // Get next interval (slower path) popPath.link(this); + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval()), resultGPR); + zeroPath = branchTestPtr(ResultCondition::NonZero, resultGPR, TrustedImm32(1)); + load32(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret()), scratchGPR); + xor32(Address(resultGPR, FreeCell::offsetOfScrambledBits()), scratchGPR); // Lower 32 bits -> offset to next interval + add64(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval())); + load32(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret() + 4), scratchGPR); + xor32(Address(resultGPR, FreeCell::offsetOfScrambledBits() + 4), scratchGPR); // Upper 32 bits -> size of interval + storePtr(resultGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart())); + addPtr(resultGPR, scratchGPR); + storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalEnd())); + jump(bumpLabel); +#else + // Otherwise, we have a fairly general case for all other architectures here. - ASSERT(static_cast(LocalAllocator::offsetOfFreeList() + FreeList::offsetOfScrambledHead() + sizeof(void*)) == static_cast(LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret())); - if constexpr (isARM64()) { - loadPairPtr(allocatorGPR, TrustedImm32(LocalAllocator::offsetOfFreeList() + FreeList::offsetOfScrambledHead()), resultGPR, scratchGPR); - xorPtr(scratchGPR, resultGPR); + // Bump allocation (fast path) + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart()), resultGPR); + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalEnd()), scratchGPR); + popPath = branchPtr(RelationalCondition::AboveOrEqual, resultGPR, scratchGPR); + auto bumpLabel = label(); + if (allocator.isConstant()) { + move(resultGPR, scratchGPR); + addPtr(TrustedImm32(allocator.allocator().cellSize()), scratchGPR); } else { - loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfScrambledHead()), resultGPR); - xorPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret()), resultGPR); + load32(Address(allocatorGPR, LocalAllocator::offsetOfCellSize()), scratchGPR); + addPtr(resultGPR, scratchGPR); } - slowPath.append(branchTestPtr(Zero, resultGPR)); - - // The object is half-allocated: we have what we know is a fresh object, but - // it's still on the GC's free list. - loadPtr(Address(resultGPR, FreeCell::offsetOfScrambledNext()), scratchGPR); - storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfScrambledHead())); - + storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalStart())); + done = jump(); + + // Get next interval (slower path) + popPath.link(this); + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval()), resultGPR); + zeroPath = branchTestPtr(ResultCondition::NonZero, resultGPR, TrustedImm32(1)); + load32(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret()), scratchGPR); + xor32(Address(resultGPR, FreeCell::offsetOfScrambledBits()), scratchGPR); // Lower 32 bits -> offset to next interval + loadPtr(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval()), dataTempRegister); + addPtr(scratchGPR, dataTempRegister); + storePtr(dataTempRegister, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfNextInterval())); + load32(Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfSecret() + 4), scratchGPR); + xor32(Address(resultGPR, FreeCell::offsetOfScrambledBits() + 4), scratchGPR); // Upper 32 bits -> size of interval + addPtr(resultGPR, scratchGPR); + storePtr(scratchGPR, Address(allocatorGPR, LocalAllocator::offsetOfFreeList() + FreeList::offsetOfIntervalEnd())); + jump(bumpLabel); +#endif + + zeroPath.link(this); + xorPtr(resultGPR, resultGPR); + slowPath.append(jump()); + done.link(this); } @@ -738,15 +809,15 @@ void AssemblyHelpers::emitAllocate(GPRReg resultGPR, const JITAllocator& allocat void AssemblyHelpers::emitAllocateVariableSized(GPRReg resultGPR, CompleteSubspace& subspace, GPRReg allocationSize, GPRReg scratchGPR1, GPRReg scratchGPR2, JumpList& slowPath) { static_assert(!(MarkedSpace::sizeStep & (MarkedSpace::sizeStep - 1)), "MarkedSpace::sizeStep must be a power of two."); - + unsigned stepShift = getLSBSet(MarkedSpace::sizeStep); - + add32(TrustedImm32(MarkedSpace::sizeStep - 1), allocationSize, scratchGPR1); urshift32(TrustedImm32(stepShift), scratchGPR1); slowPath.append(branch32(Above, scratchGPR1, TrustedImm32(MarkedSpace::largeCutoff >> stepShift))); move(TrustedImmPtr(subspace.allocatorForSizeStep()), scratchGPR2); loadPtr(BaseIndex(scratchGPR2, scratchGPR1, ScalePtr), scratchGPR1); - + emitAllocate(resultGPR, JITAllocator::variable(), scratchGPR1, scratchGPR2, slowPath); } @@ -796,7 +867,7 @@ void AssemblyHelpers::restoreCalleeSavesFromEntryFrameCalleeSavesBuffer(EntryFra } } ASSERT(scratch != InvalidGPRReg); - + RegisterSet skipList; skipList.merge(dontRestoreRegisters); @@ -1410,7 +1481,7 @@ void AssemblyHelpers::emitRestoreCalleeSavesFor(const RegisterAtOffsetList* call JIT_COMMENT(*this, "emitRestoreCalleeSavesFor ", *calleeSaves, " dontSave: ", dontRestoreRegisters); else JIT_COMMENT(*this, "emitRestoreCalleeSavesFor"); - + LoadRegSpooler spooler(*this, framePointerRegister); unsigned i = 0;