Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 148 additions & 41 deletions src/coreclr/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,40 @@ static RtlGrowFunctionTableFnPtr pRtlGrowFunctionTable;
static RtlDeleteGrowableFunctionTableFnPtr pRtlDeleteGrowableFunctionTable;

static bool s_publishingActive; // Publishing to ETW is turned on
static Crst* s_pUnwindInfoTablePublishLock; // Protects main table, OS registration, and lazy init
static Crst* s_pUnwindInfoTablePendingLock; // Protects pending buffer only

namespace
{
// RAII helper used by UnwindInfoTable::FlushPendingEntries
class FlushGateHolder
{
UnwindInfoTable* m_pTable;
public:
explicit FlushGateHolder(UnwindInfoTable* pTable) : m_pTable(pTable) {}
~FlushGateHolder() { m_pTable->ReleaseFlushGate(); }

FlushGateHolder(const FlushGateHolder&) = delete;
FlushGateHolder& operator=(const FlushGateHolder&) = delete;
FlushGateHolder(FlushGateHolder&&) = delete;
FlushGateHolder& operator=(FlushGateHolder&&) = delete;
};

int __cdecl CompareByBeginAddress(const void* a, const void* b)
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;
FORBID_FAULT;
}
CONTRACTL_END;

DWORD aBegin = ((const T_RUNTIME_FUNCTION*)a)->BeginAddress;
DWORD bBegin = ((const T_RUNTIME_FUNCTION*)b)->BeginAddress;
if (aBegin < bBegin) return -1;
if (aBegin > bBegin) return 1;
return 0;
}
}

/****************************************************************************/
// initialize the entry points for new win8 unwind info publishing functions.
Expand Down Expand Up @@ -135,9 +167,10 @@ bool InitUnwindFtns()

/****************************************************************************/
UnwindInfoTable::UnwindInfoTable(ULONG_PTR rangeStart, ULONG_PTR rangeEnd)
: m_publishLock(CrstUnwindInfoTablePublishLock)
, m_pendingLock(CrstUnwindInfoTablePendingLock)
{
STANDARD_VM_CONTRACT;
_ASSERTE(s_pUnwindInfoTablePublishLock->OwnedByCurrentThread());
_ASSERTE((rangeEnd - rangeStart) <= 0x7FFFFFFF);

// We can choose the average method size estimate dynamically based on past experience
Expand All @@ -156,6 +189,8 @@ UnwindInfoTable::UnwindInfoTable(ULONG_PTR rangeStart, ULONG_PTR rangeEnd)
hHandle = NULL;
pTable = new T_RUNTIME_FUNCTION[cTableMaxCount];
cPendingCount = 0;
m_flushInProgress = 0;
m_registrationFailed = false;
}

/****************************************************************************/
Expand All @@ -176,7 +211,8 @@ UnwindInfoTable::~UnwindInfoTable()
/*****************************************************************************/
void UnwindInfoTable::Register()
{
_ASSERTE(s_pUnwindInfoTablePublishLock->OwnedByCurrentThread());
// Caller either holds m_publishLock, or has just constructed this object
// and has not yet exposed it to other threads.

NTSTATUS ret = pRtlAddGrowableFunctionTable(&hHandle, pTable, cTableCurCount, cTableMaxCount, iRangeStart, iRangeEnd);
if (ret != STATUS_SUCCESS)
Expand Down Expand Up @@ -205,7 +241,7 @@ void UnwindInfoTable::UnRegister()

/*****************************************************************************/
// Add 'data' entries to the pending buffer for later publication to the OS.
// When the buffer is full, entries are flushed under s_pUnwindInfoTablePublishLock.
// When the buffer is full, entries are flushed under m_publishLock.
//
void UnwindInfoTable::AddToUnwindInfoTable(PT_RUNTIME_FUNCTION data, int count)
{
Expand All @@ -218,10 +254,13 @@ void UnwindInfoTable::AddToUnwindInfoTable(PT_RUNTIME_FUNCTION data, int count)

_ASSERTE(s_publishingActive);

if (m_registrationFailed)
return;

for (int i = 0; i < count; )
{
{
CrstHolder pendingLock(s_pUnwindInfoTablePendingLock);
CrstHolder pendingLock(&m_pendingLock);
while (i < count && cPendingCount < cPendingMaxCount)
{
_ASSERTE(data[i].BeginAddress <= RUNTIME_FUNCTION__EndAddress(&data[i], iRangeStart));
Expand All @@ -241,26 +280,26 @@ void UnwindInfoTable::AddToUnwindInfoTable(PT_RUNTIME_FUNCTION data, int count)
}
}

/*****************************************************************************/
void UnwindInfoTable::FlushPendingEntries()
void UnwindInfoTable::FlushPendingEntriesUnderGate()
{
CONTRACTL
{
THROWS;
GC_TRIGGERS;
PRECONDITION(m_flushInProgress != 0);
}
CONTRACTL_END;

// Free the old table outside the lock
NewArrayHolder<T_RUNTIME_FUNCTION> oldPTable;

CrstHolder publishLock(s_pUnwindInfoTablePublishLock);
CrstHolder publishLock(&m_publishLock);

if (hHandle == NULL)
{
// If hHandle is null, it means Register() failed. Skip flushing to avoid calling
// RtlGrowFunctionTable with a null handle.
CrstHolder pendingLock(s_pUnwindInfoTablePendingLock);
CrstHolder pendingLock(&m_pendingLock);
cPendingCount = 0;
return;
}
Expand All @@ -270,7 +309,7 @@ void UnwindInfoTable::FlushPendingEntries()
T_RUNTIME_FUNCTION localPending[cPendingMaxCount];
ULONG localPendingCount;
{
CrstHolder pendingLock(s_pUnwindInfoTablePendingLock);
CrstHolder pendingLock(&m_pendingLock);
localPendingCount = cPendingCount;
memcpy(localPending, pendingTable, cPendingCount * sizeof(T_RUNTIME_FUNCTION));
cPendingCount = 0;
Expand All @@ -281,20 +320,8 @@ void UnwindInfoTable::FlushPendingEntries()
return;

// Sort the pending entries by BeginAddress.
// Use a simple insertion sort since cPendingMaxCount is small (32).
static_assert(cPendingMaxCount == 32,
"cPendingMaxCount was updated and might be too large for insertion sort, consider using a better algorithm");
for (ULONG i = 1; i < localPendingCount; i++)
{
T_RUNTIME_FUNCTION key = localPending[i];
ULONG j = i;
while (j > 0 && localPending[j - 1].BeginAddress > key.BeginAddress)
{
localPending[j] = localPending[j - 1];
j--;
}
localPending[j] = key;
}
qsort(localPending, localPendingCount, sizeof(T_RUNTIME_FUNCTION),
CompareByBeginAddress);

// Fast path: if all pending entries can be appended in order with room to spare,
// we can just append and call RtlGrowFunctionTable.
Expand Down Expand Up @@ -380,6 +407,46 @@ void UnwindInfoTable::FlushPendingEntries()
Register();
}

/*****************************************************************************/
void UnwindInfoTable::FlushPendingEntries()
{
CONTRACTL
{
THROWS;
GC_TRIGGERS;
}
CONTRACTL_END;

// This thread attempts to become the sole flusher for this table by taking
// the flush gate. If it wins, publish the pending entries, then release the gate,
// re-check if more entries arrived and loop if so.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the other threads need to wait for their entries to be flushed?

I do not see any code that does that.

Copy link
Copy Markdown
Member Author

@eduardo-vp eduardo-vp May 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After AddToUnwindInfoTable finishes, they may need to wait a minimum amount of time. A thread that finds m_flushInProgress set to 1 (there's a flushing thread) just returns and waits for the flushing thread to just loop again and pick up its entries. Some threads essentially defer the work to another thread in this version as opposed to every thread taking the lock and publishing (which generated a lot of contention).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A thread that finds m_flushInProgress set to 1 (there's a flushing thread) just returns and waits for the flushing thread to just loop again

What's the line of code that makes it wait?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah it doesn't actually wait/block - I meant there's some time until it gets published. The thread just continues.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It means that the tracing stacktraces on the thread that continues are going to be broken until the flushing thread catches up. This change is regressing tracing reliability.

while (true)
{
// Gate: only one thread flushes at a time.
if (InterlockedCompareExchange(&m_flushInProgress, 1, 0) != 0)
{
return;
}

// Scope the gate holder so m_flushInProgress is reset when the scope exits.
{
FlushGateHolder gateHolder(this);
FlushPendingEntriesUnderGate();
}

// Re-check pending table under m_pendingLock.
// We only stop the retry loop if we see no more pending entries.
// If producer threads enqueued after we determined we should break the loop,
// at least one will surely be able to set m_flushInProgress to 1 and publish its entry.
bool needRetry;
{
CrstHolder pendingLock(&m_pendingLock);
needRetry = (cPendingCount != 0);
}
if (!needRetry) return;
}
}

/*****************************************************************************/
/* static */ void UnwindInfoTable::RemoveFromUnwindInfoTable(UnwindInfoTable** unwindInfoPtr, TADDR baseAddress, TADDR entryPoint)
{
Expand All @@ -403,22 +470,53 @@ void UnwindInfoTable::FlushPendingEntries()
entryPoint, baseAddress, relativeEntryPoint);

// Check the main (published) table under the publish lock.
// We don't need to check the pending buffer because the method should have already been published
// before it can be removed.
// If it wasn't found there, check the pending buffer.
// Most removes hit in the published table so this avoids taking the pending lock unnecessarily.
{
CrstHolder publishLock(s_pUnwindInfoTablePublishLock);
for (ULONG i = 0; i < unwindInfo->cTableCurCount; i++)
CrstHolder publishLock(&unwindInfo->m_publishLock);

// pTable is kept sorted by BeginAddress. Soft-deleted entries keep their BeginAddress
// intact, so the array remains sorted across removes. Binary search for the largest index
// with BeginAddress <= relativeEntryPoint, then check against EndAddress.
ULONG lo = 0;
ULONG hi = unwindInfo->cTableCurCount;
while (lo < hi)
{
if (unwindInfo->pTable[i].BeginAddress <= relativeEntryPoint &&
ULONG mid = lo + (hi - lo) / 2;
if (unwindInfo->pTable[mid].BeginAddress <= relativeEntryPoint)
lo = mid + 1;
else
hi = mid;
}

if (lo > 0)
{
ULONG i = lo - 1;
if (unwindInfo->pTable[i].UnwindData != 0 &&
relativeEntryPoint < RUNTIME_FUNCTION__EndAddress(&unwindInfo->pTable[i], unwindInfo->iRangeStart))
{
if (unwindInfo->pTable[i].UnwindData != 0)
unwindInfo->cDeletedEntries++;
unwindInfo->cDeletedEntries++;
unwindInfo->pTable[i].UnwindData = 0; // Mark the entry for deletion
STRESS_LOG1(LF_JIT, LL_INFO100, "RemoveFromUnwindInfoTable Removed entry 0x%x\n", i);
return;
}
}

// Check the pending buffer while still holding publishLock so the
// flusher cannot drain entries out from under us between the two searches.
{
CrstHolder pendingLock(&unwindInfo->m_pendingLock);
for (ULONG i = 0; i < unwindInfo->cPendingCount; i++)
{
if (unwindInfo->pendingTable[i].BeginAddress <= relativeEntryPoint &&
relativeEntryPoint < RUNTIME_FUNCTION__EndAddress(&unwindInfo->pendingTable[i], unwindInfo->iRangeStart))
{
unwindInfo->pendingTable[i] = unwindInfo->pendingTable[--unwindInfo->cPendingCount];
STRESS_LOG1(LF_JIT, LL_INFO100, "RemoveFromUnwindInfoTable Removed pending entry 0x%x\n", i);
return;
}
}
}
}

STRESS_LOG2(LF_JIT, LL_WARNING, "RemoveFromUnwindInfoTable COULD NOT FIND %p BaseAddress %p\n",
Expand All @@ -442,16 +540,28 @@ void UnwindInfoTable::FlushPendingEntries()
UnwindInfoTable* unwindInfo = VolatileLoad(&pRS->_pUnwindInfoTable);
if (unwindInfo == NULL)
{
CrstHolder publishLock(s_pUnwindInfoTablePublishLock);
if (pRS->_pUnwindInfoTable == NULL)
// Create a candidate table for this RangeSection and try to install it via CAS.
// Any losing thread (if any) deletes its candidate table. This avoids
// a process-wide lock on the cold path.
NewHolder<UnwindInfoTable> candidate(
new UnwindInfoTable(pRS->_range.RangeStart(), pRS->_range.RangeEndOpen()));
candidate->Register();
if (candidate->hHandle == NULL)
{
candidate->m_registrationFailed = true;
}

UnwindInfoTable* installed = InterlockedCompareExchangeT(
&pRS->_pUnwindInfoTable, candidate.GetValue(), (UnwindInfoTable*)NULL);
if (installed == NULL)
{
unwindInfo = new UnwindInfoTable(pRS->_range.RangeStart(), pRS->_range.RangeEndOpen());
unwindInfo->Register();
VolatileStore(&pRS->_pUnwindInfoTable, unwindInfo);
unwindInfo = candidate.Extract();
}
else
{
unwindInfo = pRS->_pUnwindInfoTable;
// Our candidate will be destroyed by the NewHolder,
// which also tears down the OS registration via ~UnwindInfoTable.
unwindInfo = installed;
}
}

Expand Down Expand Up @@ -506,9 +616,6 @@ void UnwindInfoTable::FlushPendingEntries()
if (!InitUnwindFtns())
return;

// Create the locks
s_pUnwindInfoTablePublishLock = new Crst(CrstUnwindInfoTablePublishLock);
s_pUnwindInfoTablePendingLock = new Crst(CrstUnwindInfoTablePendingLock);
s_publishingActive = true;
}

Expand Down
18 changes: 17 additions & 1 deletion src/coreclr/vm/codeman.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,12 @@ class UnwindInfoTable final
void Register();
UnwindInfoTable(ULONG_PTR rangeStart, ULONG_PTR rangeEnd);

public:
void ReleaseFlushGate() { InterlockedExchange(&m_flushInProgress, 0); }

private:
void FlushPendingEntries();
void FlushPendingEntriesUnderGate();

PVOID hHandle; // OS handle for a published RUNTIME_FUNCTION table
ULONG_PTR iRangeStart; // Start of memory described by this table
Expand All @@ -646,9 +650,21 @@ class UnwindInfoTable final
// Pending buffer for out-of-order entries that haven't been published to the OS yet.
// These entries are accumulated and batch-merged into pTable to amortize the cost of
// RtlDeleteGrowableFunctionTable + RtlAddGrowableFunctionTable.
static const ULONG cPendingMaxCount = 32;
static const ULONG cPendingMaxCount = 128;
T_RUNTIME_FUNCTION pendingTable[cPendingMaxCount];
ULONG cPendingCount;

// Per-table locks. Each UnwindInfoTable corresponds to one RangeSection, and
// independent RangeSections can publish/unpublish concurrently.
Crst m_publishLock; // Protects the main table and OS registration.
Crst m_pendingLock; // Protects the pending table only.

Volatile<LONG> m_flushInProgress;

// Whether initial OS registration failed, used to return early in AddToUnwindInfoTable.
// We cannot just check if hHandle is null because it's temporarily set to NULL
// during the flusher's merge path.
bool m_registrationFailed;
#endif // defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
};

Expand Down
15 changes: 10 additions & 5 deletions src/coreclr/vm/dynamicmethod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,8 @@ bool DynamicMethodDesc::TryDestroy()
void LCGMethodResolver::Reset()
{
m_DynamicStringLiterals = NULL;
m_DynamicCodePointers = NULL;
m_initialCodePointer = {};
m_DynamicCodePointers = &m_initialCodePointer;
m_UsedIndCellList = NULL;
m_pJumpStubCache = NULL;
m_next = NULL;
Expand Down Expand Up @@ -1583,10 +1584,14 @@ void** LCGMethodResolver::AllocateRecordCodePointer()
}
CONTRACTL_END;

DynamicCodePointer* codePointer = (DynamicCodePointer*)m_jitTempData.New(sizeof(DynamicCodePointer));
*codePointer = {};
codePointer->m_pNext = m_DynamicCodePointers;
m_DynamicCodePointers = codePointer;
DynamicCodePointer* codePointer = &m_initialCodePointer;
if (codePointer->m_pEntry != NULL)
{
codePointer = (DynamicCodePointer*)m_jitTempData.New(sizeof(DynamicCodePointer));
*codePointer = {};
codePointer->m_pNext = m_DynamicCodePointers;
m_DynamicCodePointers = codePointer;
}

return &codePointer->m_pEntry;
}
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/vm/dynamicmethod.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ class LCGMethodResolver final : public DynamicResolver
LCGMethodResolver(DynamicMethodDesc* pDynamicMethod, DynamicMethodTable* pDynamicMethodTable)
: m_pDynamicMethod{ pDynamicMethod }
, m_pDynamicMethodTable{ pDynamicMethodTable }
, m_initialCodePointer{}
, m_DynamicCodePointers{ &m_initialCodePointer }
{
LIMITED_METHOD_CONTRACT;
}
Expand Down Expand Up @@ -227,6 +229,10 @@ class LCGMethodResolver final : public DynamicResolver
DynamicMethodDesc* m_next;
ChunkAllocator m_jitMetaHeap;
ChunkAllocator m_jitTempData;
// m_DynamicCodePointers has its entries allocated in chunks (via m_jitTempData).
// Having a field for the initial code pointer avoids the chunk allocation in the common
// case of a single code pointer. We want to avoid allocations while taking m_CodeHeapLock to reduce contention.
DynamicCodePointer m_initialCodePointer;
DynamicCodePointer* m_DynamicCodePointers;
DynamicStringLiteral* m_DynamicStringLiterals;
IndCellList* m_UsedIndCellList; // list to keep track of all the indirection cells used by the jitted code
Expand Down
Loading