diff --git a/src/coreclr/tools/superpmi/superpmi-shared/asmdumper.cpp b/src/coreclr/tools/superpmi/superpmi-shared/asmdumper.cpp index 0e22c49455910..b524addc2d383 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/asmdumper.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/asmdumper.cpp @@ -37,9 +37,23 @@ void ASMDumper::DumpToFile(HANDLE hFile, MethodContext* mc, CompileResult* cr) cr->repAllocMem(&hotCodeSize, &coldCodeSize, &roDataSize, &xcptnsCount, &flag, &hotCodeBlock, &coldCodeBlock, &roDataBlock, &orig_hotCodeBlock, &orig_coldCodeBlock, &orig_roDataBlock); - cr->applyRelocs(hotCodeBlock, hotCodeSize, orig_hotCodeBlock); - cr->applyRelocs(coldCodeBlock, coldCodeSize, orig_coldCodeBlock); - cr->applyRelocs(roDataBlock, roDataSize, orig_roDataBlock); + + RelocContext rc; + + rc.mc = mc; + rc.hotCodeAddress = (size_t)hotCodeBlock; + rc.hotCodeSize = hotCodeSize; + rc.coldCodeAddress = (size_t)coldCodeBlock; + rc.coldCodeSize = coldCodeSize; + rc.roDataAddress = (size_t)roDataBlock; + rc.roDataSize = roDataSize; + rc.originalHotCodeAddress = (size_t)orig_hotCodeBlock; + rc.originalColdCodeAddress = (size_t)orig_coldCodeBlock; + rc.originalRoDataAddress = (size_t)orig_roDataBlock; + + cr->applyRelocs(&rc, hotCodeBlock, hotCodeSize, orig_hotCodeBlock); + cr->applyRelocs(&rc, coldCodeBlock, coldCodeSize, orig_coldCodeBlock); + cr->applyRelocs(&rc, roDataBlock, roDataSize, orig_roDataBlock); #ifdef USE_MSVCDIS diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp index 547f5f5789290..65970a0a1d354 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.cpp @@ -176,7 +176,7 @@ void CompileResult::dmpAllocMem(DWORD key, const Agnostic_AllocMemDetails& value value.coldCodeBlock, value.roDataBlock); } -// We can't allocate memory in the same place is was during recording, so we pass back code/data block pointers +// We can't allocate memory at the same address used during recording, so we pass back code/data block pointers // that point into the AllocMem LightWeightMap, but also return what the original addresses were during recording. void CompileResult::repAllocMem(ULONG* hotCodeSize, ULONG* coldCodeSize, @@ -644,12 +644,12 @@ void CompileResult::dmpReportFatalError(DWORD key, DWORD value) printf("ReportFatalError key Count-%u, value result-%08X", key, value); } -void CompileResult::recRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta) +void CompileResult::recRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta) { repRecordRelocation(location, target, fRelocType, slotNum, addlDelta); } -const char* relocationTypeToString(WORD fRelocType) +const char* relocationTypeToString(uint16_t fRelocType) { switch (fRelocType) { @@ -678,11 +678,11 @@ const char* relocationTypeToString(WORD fRelocType) } void CompileResult::dmpRecordRelocation(DWORD key, const Agnostic_RecordRelocation& value) { - printf("RecordRelocation key %u, value loc-%016llX tgt-%016llX fRelocType-%u(%s) slotNum-%u addlDelta-%d", key, - value.location, value.target, value.fRelocType, relocationTypeToString((WORD)value.fRelocType), - value.slotNum, (INT32)value.addlDelta); + printf("RecordRelocation key %u, value loc-%016llX tgt-%016llX fRelocType-%u(%s) slotNum-%u addlDelta:%d", key, + value.location, value.target, value.fRelocType, relocationTypeToString((uint16_t)value.fRelocType), + value.slotNum, (int32_t)value.addlDelta); } -void CompileResult::repRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta) +void CompileResult::repRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta) { if (RecordRelocation == nullptr) RecordRelocation = new DenseLightWeightMap(); @@ -718,7 +718,8 @@ void CompileResult::repRecordRelocation(void* location, void* target, WORD fRelo // current section (using originalAddr), assuming we needed a jump stub. We'll let multiple calls to potentially // different functions use the same address because even if they used different ones, and diffs were generated, // no textual diffs would appear because most of the textual call names are "hackishMethodName". -void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* originalAddr) +// +void CompileResult::applyRelocs(RelocContext* rc, unsigned char* block1, ULONG blocksize1, void* originalAddr) { if (RecordRelocation == nullptr) return; @@ -758,7 +759,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr; if ((section_begin <= address) && (address < section_end)) // A reloc for our section? { - LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, + LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, (DWORD)tmp.target); *(DWORD*)address = (DWORD)tmp.target; } @@ -871,7 +872,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr; if ((section_begin <= address) && (address < section_end)) // A reloc for our section? { - LogDebug(" fixupLoc-%016llX (@%p) %016llX => %016llX", fixupLocation, address, + LogDebug(" fixupLoc-%016llX (@%p) %016llX => %016llX", fixupLocation, address, *(DWORDLONG*)address, tmp.target); *(DWORDLONG*)address = tmp.target; } @@ -891,12 +892,145 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr; if ((section_begin <= address) && (address < section_end)) // A reloc for our section? { - DWORDLONG target = tmp.target + tmp.addlDelta; + DWORDLONG target = tmp.target + (int32_t)tmp.addlDelta; DWORDLONG baseAddr = fixupLocation + sizeof(INT32); INT64 delta = (INT64)(target - baseAddr); + bool deltaIsFinal = false; if (IsSpmiTarget64Bit()) { + if (GetSpmiTargetArchitecture() == SPMI_TARGET_ARCHITECTURE_AMD64) + { + // For just AMD64: + // The VM attempts to allocate the JIT code buffer near the CLR assemblies, so 32-bit + // offsets (and REL32 relocations) can be used in the code. If this doesn't work out, + // such that a REL32 relocation doesn't fit, the VM throws away the JIT result, disables + // using REL32 relocations, and restarts compilation. SuperPMI doesn't know where the + // original compilation (during the collection) was allocated (though maybe we should + // add that to the MC, not just the CompileResult), and we don't have any control over + // where the JIT buffer is allocated. To handle this, if the getRelocTypeHint() was + // called on the target address, and the VM returned IMAGE_REL_BASED_REL32, then simply + // use the low-order 32 bits of the target address. This is unique enough for for assembly + // diffs, because the delta will compare identically and won't be dependent on where + // SuperPMI allocated the JIT memory. + + if (rc->mc->GetRelocTypeHint != nullptr) + { + DWORDLONG key = tmp.target; + int index = rc->mc->GetRelocTypeHint->GetIndex(key); + if (index == -1) + { + // See if the original address is in the replay address map. This happens for + // relocations on static field addresses found via getFieldAddress(). + void* origAddr = repAddressMap((void*)tmp.target); + if ((origAddr != (void*)-1) && (origAddr != nullptr)) + { + key = CastPointer(origAddr); + index = rc->mc->GetRelocTypeHint->GetIndex(key); + if (index != -1) + { + LogDebug(" Using address map: target %016llX, original target %016llX", + tmp.target, key); + } + } + } + + if (index != -1) + { + WORD retVal = (WORD)rc->mc->GetRelocTypeHint->Get(key); + if (retVal == IMAGE_REL_BASED_REL32) + { + LogDebug(" REL32 target used as argument to getRelocTypeHint: setting delta=%d (0x%X)", + (int)key, (int)key); + delta = (INT64)(int)key; + deltaIsFinal = true; + } + } + } + } + + if (!deltaIsFinal) + { + // Check if tmp.target is the result of a call to getHelperFtn(). If so, the VM would create a + // jump stub if the REL32 address doesn't fit. We don't want to fail with a REL32 overflow if + // the actual target address doesn't fit, so use the low-order 32 bits of the address. + // We need to iterate the entire table since we don't know the helper function id. + + if (rc->mc->GetHelperFtn != nullptr) + { + for (unsigned int idx = 0; idx < rc->mc->GetHelperFtn->GetCount(); idx++) + { + DLDL value = rc->mc->GetHelperFtn->GetItem(idx); + if (value.B == tmp.target) + { + LogDebug(" REL32 target is result of getHelperFtn(): setting delta=%d (0x%X)", + (int)tmp.target, (int)tmp.target); + delta = (INT64)(int)tmp.target; + deltaIsFinal = true; + break; // No need to consider the remaining GetHelperFtn entries + } + } + } + } + + if (!deltaIsFinal) + { + // Check if tmp.target is the result of a call to GetFunctionEntryPoint(). As for helper + // functions, above, the VM would create a jump stub if the REL32 address doesn't fit. + + if (rc->mc->GetFunctionEntryPoint != nullptr) + { + for (unsigned int idx = 0; idx < rc->mc->GetFunctionEntryPoint->GetCount(); idx++) + { + DLD value = rc->mc->GetFunctionEntryPoint->GetItem(idx); + if (value.A == tmp.target) + { + LogDebug(" REL32 target is result of getFunctionEntryPoint(): setting delta=%d (0x%X)", + (int)tmp.target, (int)tmp.target); + delta = (INT64)(int)tmp.target; + deltaIsFinal = true; + break; // No need to consider the remaining GetFunctionEntryPoint entries + } + } + } + } + + if (!deltaIsFinal) + { + // If the relocation points to the RO-data section, we need to be careful that the relocation + // fits in 32-bits for both the baseline and diff compilations. To do that, we pretend the RO + // data section exists immediately after the current code section. + + if ((rc->originalRoDataAddress <= (size_t)target) && + ((size_t)target < rc->originalRoDataAddress + rc->roDataSize)) + { + size_t ro_section_offset = (size_t)target - rc->originalRoDataAddress; + size_t ro_section_fake_start = (size_t)-1; + + // Looks like the target is in the RO data section. + if ((rc->originalHotCodeAddress <= (size_t)fixupLocation) && + ((size_t)fixupLocation < rc->originalHotCodeAddress + rc->hotCodeSize)) + { + // Fixup location is in the hot section + ro_section_fake_start = rc->originalHotCodeAddress + rc->hotCodeSize; + delta = (INT64)(ro_section_fake_start + ro_section_offset - baseAddr); + deltaIsFinal = true; + LogDebug(" REL32 hot code target is in RO data section: setting delta=%d (0x%X)", + delta, delta); + } + else if ((rc->originalColdCodeAddress <= (size_t)fixupLocation) && + ((size_t)fixupLocation < rc->originalColdCodeAddress + rc->coldCodeSize)) + { + // Fixup location is in the cold section + ro_section_fake_start = rc->originalColdCodeAddress + rc->coldCodeSize; + delta = (INT64)(ro_section_fake_start + ro_section_offset - baseAddr); + deltaIsFinal = true; + LogDebug(" REL32 cold code target is in RO data section: setting delta=%d (0x%X)", + delta, delta); + } + } + } + if (delta != (INT64)(int)delta) { // This isn't going to fit in a signed 32-bit address. Use something that will fit, @@ -905,7 +1039,8 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1; INT64 newdelta = (INT64)(target - baseAddr); - LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX", target, delta, newdelta); + LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX", + target, delta, newdelta); delta = newdelta; } @@ -916,7 +1051,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o LogError("REL32 relocation overflows field! delta=0x%016llX", delta); } - if (targetArch == SPMI_TARGET_ARCHITECTURE_AMD64) + if ((targetArch == SPMI_TARGET_ARCHITECTURE_AMD64) && !deltaIsFinal) { // During an actual compile, recordRelocation() will be called before the compile // is actually finished, and it will write the relative offset into the fixupLocation. @@ -929,7 +1064,7 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o } // Write 32-bits into location - LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, delta); + LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, delta); *(DWORD*)address = (DWORD)delta; } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h index 4c912d10b5a38..97331720e79ba 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/compileresult.h @@ -11,6 +11,8 @@ #include "lightweightmap.h" #include "agnostic.h" +class MethodContext; + // MemoryTracker: a very simple allocator and tracker of allocated memory, so it can be deleted when needed. class MemoryTracker { @@ -52,6 +54,21 @@ class MemoryTracker MemoryNode* m_pHead; }; +// Data we need to process relocations properly. +struct RelocContext +{ + MethodContext* mc; + size_t hotCodeAddress; + size_t hotCodeSize; + size_t coldCodeAddress; + size_t coldCodeSize; + size_t roDataAddress; + size_t roDataSize; + size_t originalHotCodeAddress; + size_t originalColdCodeAddress; + size_t originalRoDataAddress; +}; + class CompileResult { public: @@ -161,10 +178,10 @@ class CompileResult void recReportFatalError(CorJitResult result); void dmpReportFatalError(DWORD key, DWORD value); - void recRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta); + void recRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta); void dmpRecordRelocation(DWORD key, const Agnostic_RecordRelocation& value); - void repRecordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta); - void applyRelocs(unsigned char* block1, ULONG blocksize1, void* originalAddr); + void repRecordRelocation(void* location, void* target, uint16_t fRelocType, uint16_t slotNum, int32_t addlDelta); + void applyRelocs(RelocContext* rc, unsigned char* block1, ULONG blocksize1, void* originalAddr); void recProcessName(const char* name); void dmpProcessName(DWORD key, DWORD value); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index e97dab1118bd4..7cca8b9adc482 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -2343,13 +2343,9 @@ void* MethodContext::repGetHelperFtn(CorInfoHelpFunc ftnNum, void** ppIndirectio // Return Value: // True if there is a helper function associated with the given target address; false otherwise. // -// Assumptions: -// Only the lower 32 bits of the method address are necessary to identify the method. -// // Notes: -// - See notes for fndGetFunctionEntryPoint for a more in-depth discussion of why we only match on the -// lower 32 bits of the target address. -// - This might not work correctly with method contexts recorded via NGen compilation. +// - This might not work correctly with method contexts recorded via NGen compilation; it doesn't compare +// the ppIndirection value. // bool MethodContext::fndGetHelperFtn(void* functionAddress, CorInfoHelpFunc* pResult) { @@ -2487,7 +2483,7 @@ void MethodContext::repGetFunctionEntryPoint(CORINFO_METHOD_HANDLE ftn, // handle associated with the given target address, it will be written to here. // // Return Value: -// True if there is a helper function associated with the given target address; false otherwise. +// True if there is a function associated with the given target address; false otherwise. // // Assumptions: // - The given method address does not point to a jump stub. diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 465b6e0ff9683..589d6d85b2c65 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -66,6 +66,8 @@ static_assert((int)EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE == (int)CORJIT_FLAGS::Co class MethodContext { + friend class CompileResult; + public: MethodContext(); diff --git a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp index b9921e189de28..2d480ee776dc5 100644 --- a/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp +++ b/src/coreclr/tools/superpmi/superpmi/neardiffer.cpp @@ -1141,12 +1141,36 @@ bool NearDiffer::compare(MethodContext* mc, CompileResult* cr1, CompileResult* c coldCodeSize_2, roDataSize_2, xcptnsCount_2, flag_2, hotCodeBlock_2, coldCodeBlock_2, roDataBlock_2, orig_hotCodeBlock_2, orig_coldCodeBlock_2, orig_roDataBlock_2); - cr1->applyRelocs(hotCodeBlock_1, hotCodeSize_1, orig_hotCodeBlock_1); - cr2->applyRelocs(hotCodeBlock_2, hotCodeSize_2, orig_hotCodeBlock_2); - cr1->applyRelocs(coldCodeBlock_1, coldCodeSize_1, orig_coldCodeBlock_1); - cr2->applyRelocs(coldCodeBlock_2, coldCodeSize_2, orig_coldCodeBlock_2); - cr1->applyRelocs(roDataBlock_1, roDataSize_1, orig_roDataBlock_1); - cr2->applyRelocs(roDataBlock_2, roDataSize_2, orig_roDataBlock_2); + RelocContext rc; + rc.mc = mc; + + rc.hotCodeAddress = (size_t)hotCodeBlock_1; + rc.hotCodeSize = hotCodeSize_1; + rc.coldCodeAddress = (size_t)coldCodeBlock_1; + rc.coldCodeSize = coldCodeSize_1; + rc.roDataAddress = (size_t)roDataBlock_1; + rc.roDataSize = roDataSize_1; + rc.originalHotCodeAddress = (size_t)orig_hotCodeBlock_1; + rc.originalColdCodeAddress = (size_t)orig_coldCodeBlock_1; + rc.originalRoDataAddress = (size_t)orig_roDataBlock_1; + + cr1->applyRelocs(&rc, hotCodeBlock_1, hotCodeSize_1, orig_hotCodeBlock_1); + cr1->applyRelocs(&rc, coldCodeBlock_1, coldCodeSize_1, orig_coldCodeBlock_1); + cr1->applyRelocs(&rc, roDataBlock_1, roDataSize_1, orig_roDataBlock_1); + + rc.hotCodeAddress = (size_t)hotCodeBlock_2; + rc.hotCodeSize = hotCodeSize_2; + rc.coldCodeAddress = (size_t)coldCodeBlock_2; + rc.coldCodeSize = coldCodeSize_2; + rc.roDataAddress = (size_t)roDataBlock_2; + rc.roDataSize = roDataSize_2; + rc.originalHotCodeAddress = (size_t)orig_hotCodeBlock_2; + rc.originalColdCodeAddress = (size_t)orig_coldCodeBlock_2; + rc.originalRoDataAddress = (size_t)orig_roDataBlock_2; + + cr2->applyRelocs(&rc, hotCodeBlock_2, hotCodeSize_2, orig_hotCodeBlock_2); + cr2->applyRelocs(&rc, coldCodeBlock_2, coldCodeSize_2, orig_coldCodeBlock_2); + cr2->applyRelocs(&rc, roDataBlock_2, roDataSize_2, orig_roDataBlock_2); if (!compareCodeSection(mc, cr1, cr2, hotCodeBlock_1, hotCodeSize_1, roDataBlock_1, roDataSize_1, orig_hotCodeBlock_1, orig_roDataBlock_1, orig_coldCodeBlock_1, coldCodeSize_1,