Skip to content

Commit

Permalink
Change SuperPMI asm diffs relocation handling (#67688)
Browse files Browse the repository at this point in the history
* Change SuperPMI asm diffs relocation handling

Add new techniques for handling relocations to prevent spurious asm diffs.
Especially, better handling for REL32 relocs on 64-bit targets, and
specifically AMD64.

With these changes, we should rarely if ever need to force REL32 relocs
to fit (there is code that already does that, as a backstop).

* Feedback

* Feedback: break out of loops once `delta` has been determined
  • Loading branch information
BruceForstall committed Apr 9, 2022
1 parent c815ca5 commit 35ff175
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 33 deletions.
20 changes: 17 additions & 3 deletions src/coreclr/tools/superpmi/superpmi-shared/asmdumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,23 @@ void ASMDumper::DumpToFile(HANDLE hFile, MethodContext* mc, CompileResult* cr)

cr->repAllocMem(&hotCodeSize, &coldCodeSize, &roDataSize, &xcptnsCount, &flag, &hotCodeBlock, &coldCodeBlock,
&roDataBlock, &orig_hotCodeBlock, &orig_coldCodeBlock, &orig_roDataBlock);
cr->applyRelocs(hotCodeBlock, hotCodeSize, orig_hotCodeBlock);
cr->applyRelocs(coldCodeBlock, coldCodeSize, orig_coldCodeBlock);
cr->applyRelocs(roDataBlock, roDataSize, orig_roDataBlock);

RelocContext rc;

rc.mc = mc;
rc.hotCodeAddress = (size_t)hotCodeBlock;
rc.hotCodeSize = hotCodeSize;
rc.coldCodeAddress = (size_t)coldCodeBlock;
rc.coldCodeSize = coldCodeSize;
rc.roDataAddress = (size_t)roDataBlock;
rc.roDataSize = roDataSize;
rc.originalHotCodeAddress = (size_t)orig_hotCodeBlock;
rc.originalColdCodeAddress = (size_t)orig_coldCodeBlock;
rc.originalRoDataAddress = (size_t)orig_roDataBlock;

cr->applyRelocs(&rc, hotCodeBlock, hotCodeSize, orig_hotCodeBlock);
cr->applyRelocs(&rc, coldCodeBlock, coldCodeSize, orig_coldCodeBlock);
cr->applyRelocs(&rc, roDataBlock, roDataSize, orig_roDataBlock);

#ifdef USE_MSVCDIS

Expand Down
163 changes: 149 additions & 14 deletions src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ void CompileResult::dmpAllocMem(DWORD key, const Agnostic_AllocMemDetails& value
value.coldCodeBlock, value.roDataBlock);
}

// We can't allocate memory in the same place is was during recording, so we pass back code/data block pointers
// We can't allocate memory at the same address used during recording, so we pass back code/data block pointers
// that point into the AllocMem LightWeightMap, but also return what the original addresses were during recording.
void CompileResult::repAllocMem(ULONG* hotCodeSize,
ULONG* coldCodeSize,
Expand Down Expand Up @@ -644,12 +644,12 @@ void CompileResult::dmpReportFatalError(DWORD key, DWORD value)
printf("ReportFatalError key Count-%u, value result-%08X", key, value);
}

void CompileResult::recRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta)
void CompileResult::recRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta)
{
repRecordRelocation(location, target, fRelocType, slotNum, addlDelta);
}

const char* relocationTypeToString(WORD fRelocType)
const char* relocationTypeToString(uint16_t fRelocType)
{
switch (fRelocType)
{
Expand Down Expand Up @@ -678,11 +678,11 @@ const char* relocationTypeToString(WORD fRelocType)
}
void CompileResult::dmpRecordRelocation(DWORD key, const Agnostic_RecordRelocation& value)
{
printf("RecordRelocation key %u, value loc-%016llX tgt-%016llX fRelocType-%u(%s) slotNum-%u addlDelta-%d", key,
value.location, value.target, value.fRelocType, relocationTypeToString((WORD)value.fRelocType),
value.slotNum, (INT32)value.addlDelta);
printf("RecordRelocation key %u, value loc-%016llX tgt-%016llX fRelocType-%u(%s) slotNum-%u addlDelta:%d", key,
value.location, value.target, value.fRelocType, relocationTypeToString((uint16_t)value.fRelocType),
value.slotNum, (int32_t)value.addlDelta);
}
void CompileResult::repRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta)
void CompileResult::repRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta)
{
if (RecordRelocation == nullptr)
RecordRelocation = new DenseLightWeightMap<Agnostic_RecordRelocation>();
Expand Down Expand Up @@ -718,7 +718,8 @@ void CompileResult::repRecordRelocation(void* location, void* target, WORD fRelo
// current section (using originalAddr), assuming we needed a jump stub. We'll let multiple calls to potentially
// different functions use the same address because even if they used different ones, and diffs were generated,
// no textual diffs would appear because most of the textual call names are "hackishMethodName".
void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* originalAddr)
//
void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG blocksize1, void* originalAddr)
{
if (RecordRelocation == nullptr)
return;
Expand Down Expand Up @@ -758,7 +759,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address,
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address,
(DWORD)tmp.target);
*(DWORD*)address = (DWORD)tmp.target;
}
Expand Down Expand Up @@ -871,7 +872,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
LogDebug(" fixupLoc-%016llX (@%p) %016llX => %016llX", fixupLocation, address,
LogDebug(" fixupLoc-%016llX (@%p) %016llX => %016llX", fixupLocation, address,
*(DWORDLONG*)address, tmp.target);
*(DWORDLONG*)address = tmp.target;
}
Expand All @@ -891,12 +892,145 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
DWORDLONG target = tmp.target + tmp.addlDelta;
DWORDLONG target = tmp.target + (int32_t)tmp.addlDelta;
DWORDLONG baseAddr = fixupLocation + sizeof(INT32);
INT64 delta = (INT64)(target - baseAddr);
bool deltaIsFinal = false;

if (IsSpmiTarget64Bit())
{
if (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64)
{
// For just AMD64:
// The VM attempts to allocate the JIT code buffer near the CLR assemblies, so 32-bit
// offsets (and REL32 relocations) can be used in the code. If this doesn't work out,
// such that a REL32 relocation doesn't fit, the VM throws away the JIT result, disables
// using REL32 relocations, and restarts compilation. SuperPMI doesn't know where the
// original compilation (during the collection) was allocated (though maybe we should
// add that to the MC, not just the CompileResult), and we don't have any control over
// where the JIT buffer is allocated. To handle this, if the getRelocTypeHint() was
// called on the target address, and the VM returned IMAGE_REL_BASED_REL32, then simply
// use the low-order 32 bits of the target address. This is unique enough for for assembly
// diffs, because the delta will compare identically and won't be dependent on where
// SuperPMI allocated the JIT memory.

if (rc->mc->GetRelocTypeHint != nullptr)
{
DWORDLONG key = tmp.target;
int index = rc->mc->GetRelocTypeHint->GetIndex(key);
if (index == -1)
{
// See if the original address is in the replay address map. This happens for
// relocations on static field addresses found via getFieldAddress().
void* origAddr = repAddressMap((void*)tmp.target);
if ((origAddr != (void*)-1) && (origAddr != nullptr))
{
key = CastPointer(origAddr);
index = rc->mc->GetRelocTypeHint->GetIndex(key);
if (index != -1)
{
LogDebug(" Using address map: target %016llX, original target %016llX",
tmp.target, key);
}
}
}

if (index != -1)
{
WORD retVal = (WORD)rc->mc->GetRelocTypeHint->Get(key);
if (retVal == IMAGE_REL_BASED_REL32)
{
LogDebug(" REL32 target used as argument to getRelocTypeHint: setting delta=%d (0x%X)",
(int)key, (int)key);
delta = (INT64)(int)key;
deltaIsFinal = true;
}
}
}
}

if (!deltaIsFinal)
{
// Check if tmp.target is the result of a call to getHelperFtn(). If so, the VM would create a
// jump stub if the REL32 address doesn't fit. We don't want to fail with a REL32 overflow if
// the actual target address doesn't fit, so use the low-order 32 bits of the address.
// We need to iterate the entire table since we don't know the helper function id.

if (rc->mc->GetHelperFtn != nullptr)
{
for (unsigned int idx = 0; idx < rc->mc->GetHelperFtn->GetCount(); idx++)
{
DLDL value = rc->mc->GetHelperFtn->GetItem(idx);
if (value.B == tmp.target)
{
LogDebug(" REL32 target is result of getHelperFtn(): setting delta=%d (0x%X)",
(int)tmp.target, (int)tmp.target);
delta = (INT64)(int)tmp.target;
deltaIsFinal = true;
break; // No need to consider the remaining GetHelperFtn entries
}
}
}
}

if (!deltaIsFinal)
{
// Check if tmp.target is the result of a call to GetFunctionEntryPoint(). As for helper
// functions, above, the VM would create a jump stub if the REL32 address doesn't fit.

if (rc->mc->GetFunctionEntryPoint != nullptr)
{
for (unsigned int idx = 0; idx < rc->mc->GetFunctionEntryPoint->GetCount(); idx++)
{
DLD value = rc->mc->GetFunctionEntryPoint->GetItem(idx);
if (value.A == tmp.target)
{
LogDebug(" REL32 target is result of getFunctionEntryPoint(): setting delta=%d (0x%X)",
(int)tmp.target, (int)tmp.target);
delta = (INT64)(int)tmp.target;
deltaIsFinal = true;
break; // No need to consider the remaining GetFunctionEntryPoint entries
}
}
}
}

if (!deltaIsFinal)
{
// If the relocation points to the RO-data section, we need to be careful that the relocation
// fits in 32-bits for both the baseline and diff compilations. To do that, we pretend the RO
// data section exists immediately after the current code section.

if ((rc->originalRoDataAddress <= (size_t)target) &&
((size_t)target < rc->originalRoDataAddress + rc->roDataSize))
{
size_t ro_section_offset = (size_t)target - rc->originalRoDataAddress;
size_t ro_section_fake_start = (size_t)-1;

// Looks like the target is in the RO data section.
if ((rc->originalHotCodeAddress <= (size_t)fixupLocation) &&
((size_t)fixupLocation < rc->originalHotCodeAddress + rc->hotCodeSize))
{
// Fixup location is in the hot section
ro_section_fake_start = rc->originalHotCodeAddress + rc->hotCodeSize;
delta = (INT64)(ro_section_fake_start + ro_section_offset - baseAddr);
deltaIsFinal = true;
LogDebug(" REL32 hot code target is in RO data section: setting delta=%d (0x%X)",
delta, delta);
}
else if ((rc->originalColdCodeAddress <= (size_t)fixupLocation) &&
((size_t)fixupLocation < rc->originalColdCodeAddress + rc->coldCodeSize))
{
// Fixup location is in the cold section
ro_section_fake_start = rc->originalColdCodeAddress + rc->coldCodeSize;
delta = (INT64)(ro_section_fake_start + ro_section_offset - baseAddr);
deltaIsFinal = true;
LogDebug(" REL32 cold code target is in RO data section: setting delta=%d (0x%X)",
delta, delta);
}
}
}

if (delta != (INT64)(int)delta)
{
// This isn't going to fit in a signed 32-bit address. Use something that will fit,
Expand All @@ -905,7 +1039,8 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1;
INT64 newdelta = (INT64)(target - baseAddr);

LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX", target, delta, newdelta);
LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX",
target, delta, newdelta);

delta = newdelta;
}
Expand All @@ -916,7 +1051,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
LogError("REL32 relocation overflows field! delta=0x%016llX", delta);
}

if (targetArch == SPMI_TARGET_ARCHITECTURE_AMD64)
if ((targetArch == SPMI_TARGET_ARCHITECTURE_AMD64) && !deltaIsFinal)
{
// During an actual compile, recordRelocation() will be called before the compile
// is actually finished, and it will write the relative offset into the fixupLocation.
Expand All @@ -929,7 +1064,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
}

// Write 32-bits into location
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, delta);
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, delta);
*(DWORD*)address = (DWORD)delta;
}

Expand Down
23 changes: 20 additions & 3 deletions src/coreclr/tools/superpmi/superpmi-shared/compileresult.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "lightweightmap.h"
#include "agnostic.h"

class MethodContext;

// MemoryTracker: a very simple allocator and tracker of allocated memory, so it can be deleted when needed.
class MemoryTracker
{
Expand Down Expand Up @@ -52,6 +54,21 @@ class MemoryTracker
MemoryNode* m_pHead;
};

// Data we need to process relocations properly.
struct RelocContext
{
MethodContext* mc;
size_t hotCodeAddress;
size_t hotCodeSize;
size_t coldCodeAddress;
size_t coldCodeSize;
size_t roDataAddress;
size_t roDataSize;
size_t originalHotCodeAddress;
size_t originalColdCodeAddress;
size_t originalRoDataAddress;
};

class CompileResult
{
public:
Expand Down Expand Up @@ -161,10 +178,10 @@ class CompileResult
void recReportFatalError(CorJitResult result);
void dmpReportFatalError(DWORD key, DWORD value);

void recRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta);
void recRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta);
void dmpRecordRelocation(DWORD key, const Agnostic_RecordRelocation& value);
void repRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta);
void applyRelocs(unsigned char* block1, ULONG blocksize1, void* originalAddr);
void repRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta);
void applyRelocs(RelocContext* rc, unsigned char* block1, ULONG blocksize1, void* originalAddr);

void recProcessName(const char* name);
void dmpProcessName(DWORD key, DWORD value);
Expand Down
10 changes: 3 additions & 7 deletions src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2343,13 +2343,9 @@ void* MethodContext::repGetHelperFtn(CorInfoHelpFunc ftnNum, void** ppIndirectio
// Return Value:
// True if there is a helper function associated with the given target address; false otherwise.
//
// Assumptions:
// Only the lower 32 bits of the method address are necessary to identify the method.
//
// Notes:
// - See notes for fndGetFunctionEntryPoint for a more in-depth discussion of why we only match on the
// lower 32 bits of the target address.
// - This might not work correctly with method contexts recorded via NGen compilation.
// - This might not work correctly with method contexts recorded via NGen compilation; it doesn't compare
// the ppIndirection value.
//
bool MethodContext::fndGetHelperFtn(void* functionAddress, CorInfoHelpFunc* pResult)
{
Expand Down Expand Up @@ -2487,7 +2483,7 @@ void MethodContext::repGetFunctionEntryPoint(CORINFO_METHOD_HANDLE ftn,
// handle associated with the given target address, it will be written to here.
//
// Return Value:
// True if there is a helper function associated with the given target address; false otherwise.
// True if there is a function associated with the given target address; false otherwise.
//
// Assumptions:
// - The given method address does not point to a jump stub.
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ static_assert((int)EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE == (int)CORJIT_FLAGS::Co

class MethodContext
{
friend class CompileResult;

public:
MethodContext();

Expand Down
Loading

0 comments on commit 35ff175

Please sign in to comment.