Skip to content

Commit

Permalink
Merge pull request #4604 from jdmpapin/pic-data-alignment
Browse files Browse the repository at this point in the history
Align mutable fields of x86 PIC data
  • Loading branch information
andrewcraik committed Feb 14, 2019
2 parents 35c44ea + 3a6ddd3 commit b1cb17c
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 316 deletions.
2 changes: 0 additions & 2 deletions runtime/compiler/runtime/Runtime.cpp
Expand Up @@ -387,7 +387,6 @@ JIT_HELPER(encodeUTF16Big);
JIT_HELPER(encodeUTF16Little);

JIT_HELPER(SMPVPicInit);
JIT_HELPER(resolveAndPopulateVTableDispatch);
JIT_HELPER(interpreterEAXStaticGlue);
JIT_HELPER(interpreterEDXEAXStaticGlue);
JIT_HELPER(interpreterST0FStaticGlue);
Expand Down Expand Up @@ -1258,7 +1257,6 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP)
SET(TR_IA32encodeUTF16Little, (void *)encodeUTF16Little, TR_Helper);

SET(TR_jitAddPicToPatchOnClassUnload, (void *)jitAddPicToPatchOnClassUnload, TR_Helper);
SET(TR_IA32interpreterUnresolvedVTableSlotGlue, (void *)resolveAndPopulateVTableDispatch, TR_Helper);

SET(TR_IA32JitMonitorEnterReserved, (void *)jitMonitorEnterReserved, TR_CHelper);
SET(TR_IA32JitMonitorEnterReservedPrimitive, (void *)jitMonitorEnterReservedPrimitive, TR_CHelper);
Expand Down
230 changes: 91 additions & 139 deletions runtime/compiler/x/codegen/CallSnippet.cpp
Expand Up @@ -39,6 +39,10 @@
#include "il/symbol/StaticSymbol.hpp"
#include "x/codegen/X86PrivateLinkage.hpp"

// TODO: Delete this forward declaration along with the corresponding
// TR_Debug::print overload.
namespace TR { class X86UnresolvedVirtualCallSnippet; }

bool TR::X86PicDataSnippet::shouldEmitJ2IThunkPointer()
{
if (!TR::Compiler->target.is64Bit())
Expand Down Expand Up @@ -162,7 +166,17 @@ uint8_t *TR::X86PicDataSnippet::emitSnippetBody()
//
// Slow interface lookup dispatch.
//
getSnippetLabel()->setCodeLocation(startOfSnippet);

// Align the IPIC data to a pointer-sized boundary to ensure that the
// interface class and itable offset are naturally aligned.
uintptr_t offsetToIpicData = 10;
uintptr_t unalignedIpicDataStart = (uintptr_t)cursor + offsetToIpicData;
uintptr_t alignMask = sizeof (uintptrj_t) - 1;
uintptr_t alignedIpicDataStart =
(unalignedIpicDataStart + alignMask) & ~alignMask;
cursor += alignedIpicDataStart - unalignedIpicDataStart;

getSnippetLabel()->setCodeLocation(cursor);

// Slow path lookup dispatch
//
Expand Down Expand Up @@ -202,6 +216,14 @@ uint8_t *TR::X86PicDataSnippet::emitSnippetBody()
TR_ASSERT_FATAL(0, "Can't handle resolved IPICs here yet!");
}

// Because the interface class and itable offset (immediately following)
// are written at runtime and might be read concurrently by another
// thread, they must be naturally aligned to guarantee that all accesses
// to them are atomic.
TR_ASSERT_FATAL(
((uintptr_t)cursor & (sizeof(uintptrj_t) - 1)) == 0,
"interface class and itable offset IPIC data slots are unaligned");

// Reserve space for resolved interface class and itable offset.
// These slots will be populated during interface class resolution.
// The itable offset slot doubles as a direct J9Method pointer slot.
Expand Down Expand Up @@ -266,20 +288,12 @@ uint8_t *TR::X86PicDataSnippet::emitSnippetBody()

cursor += (requiredEntryPoint - entryPoint);

// DD/DQ cpAddr
// DD/DQ cpIndex
//
cursor = encodeConstantPoolInfo(cursor);

// DD/DQ directMethod (initially null)
*(uintptrj_t *)cursor = 0;
cursor += sizeof(uintptrj_t);

// Put the narrow integers before the pointer-sized ones. This way,
// directMethod (which is mutable) will be aligned simply as a
// consequence of the alignment required for patching the code that
// immediately follows the VPIC data.
if (TR::Compiler->target.is64Bit())
{
// DD/DQ j2iThunk
cursor = encodeJ2IThunkPointer(cursor);

// REX prefix of MOVRegImm64 instruction
//
uint8_t *slotPatchInstructionBytes = (uint8_t *)_slotPatchInstruction->getBinaryEncoding();
Expand All @@ -298,9 +312,6 @@ uint8_t *TR::X86PicDataSnippet::emitSnippetBody()
slotPatchInstructionBytes += 11;
callModRMByte = (*slotPatchInstructionBytes & 7) + 0x90;
*cursor++ = callModRMByte;

// SIB 49ff9424feffffff call qword ptr [r12-2]

}
else
{
Expand All @@ -309,6 +320,28 @@ uint8_t *TR::X86PicDataSnippet::emitSnippetBody()
uint8_t *slotPatchInstructionBytes = (uint8_t *)_slotPatchInstruction->getBinaryEncoding();
*cursor++ = *(slotPatchInstructionBytes+1);
}

// DD/DQ cpAddr
// DD/DQ cpIndex
//
cursor = encodeConstantPoolInfo(cursor);

// Because directMethod (immediately following) is written at runtime
// and might be read concurrently by another thread, it must be
// naturally aligned to ensure that all accesses to it are atomic.
TR_ASSERT_FATAL(
((uintptr_t)cursor & (sizeof(uintptrj_t) - 1)) == 0,
"directMethod VPIC data slot is unaligned");

// DD/DQ directMethod (initially null)
*(uintptrj_t *)cursor = 0;
cursor += sizeof(uintptrj_t);

if (TR::Compiler->target.is64Bit())
{
// DD/DQ j2iThunk
cursor = encodeJ2IThunkPointer(cursor);
}
}
else
{
Expand Down Expand Up @@ -571,43 +604,8 @@ TR_Debug::print(TR::FILE *pOutFile, TR::X86PicDataSnippet *snippet)
{
const char *op = (sizeof(uintptrj_t) == 4) ? "DD" : "DQ";

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s owning method cpAddr",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s cpIndex",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s direct J9Method (initially null)",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

if (TR::Compiler->target.is64Bit())
{
printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s j2i virtual thunk",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

printPrefix(pOutFile, NULL, bufferPos, 1);
trfprintf(pOutFile, "%s\t%02x\t\t\t\t\t\t\t\t%s REX of MOVRegImm64",
dbString(),
Expand Down Expand Up @@ -646,6 +644,44 @@ TR_Debug::print(TR::FILE *pOutFile, TR::X86PicDataSnippet *snippet)
commentString());
bufferPos += 1;
}

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s owning method cpAddr",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s cpIndex",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s direct J9Method (initially null)",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);

if (TR::Compiler->target.is64Bit())
{
printPrefix(pOutFile, NULL, bufferPos, sizeof(uintptrj_t));
trfprintf(
pOutFile,
"%s\t" POINTER_PRINTF_FORMAT "\t\t%s j2i virtual thunk",
op,
(void*)*(uintptrj_t*)bufferPos,
commentString());
bufferPos += sizeof(uintptrj_t);
}
}

if (TR::Compiler->target.is64Bit())
Expand Down Expand Up @@ -692,7 +728,8 @@ uint32_t TR::X86PicDataSnippet::getLength(int32_t estimatedSnippetStart)
+ 5 // JMP done
+ (4 * sizeof(uintptrj_t)) // Resolve slots
+ (TR::Compiler->target.is64Bit() ? 2 : 1) // ModRM or REX+MOV
+ (_hasJ2IThunkInPicData ? sizeof(uintptrj_t) : 0); // j2i thunk pointer
+ (_hasJ2IThunkInPicData ? sizeof(uintptrj_t) : 0) // j2i thunk pointer
+ sizeof (uintptrj_t) - 1; // alignment
}
else
{
Expand Down Expand Up @@ -1098,94 +1135,9 @@ uint32_t TR::X86CallSnippet::getLength(int32_t estimatedSnippetStart)
return length;
}


uint8_t *TR::X86UnresolvedVirtualCallSnippet::emitSnippetBody()
{
TR_ASSERT(TR::Compiler->target.is32Bit(), "TR::X86UnresolvedVirtualCallSnippet only available on 32-bit");
TR::Compilation *comp = cg()->comp();
uint8_t *cursor = cg()->getBinaryBufferCursor();
getSnippetLabel()->setCodeLocation(cursor);

// Preserve edx
//
*cursor++ = 0x52; // PUSH edx to preserve

// Call the runtime helper
//
*cursor++ = 0xe8; // CALL resolveAndPopulateVTableDispatch

TR::SymbolReference *glueSymRef =
cg()->symRefTab()->findOrCreateRuntimeHelper(TR_IA32interpreterUnresolvedVTableSlotGlue, false, false, false);
uint8_t *glueAddress = (uint8_t *)glueSymRef->getMethodAddress();
cg()->addExternalRelocation(new (comp->trHeapMemory()) TR::ExternalRelocation(cursor,
(uint8_t *)glueSymRef,
TR_HelperAddress,
cg()),
__FILE__, __LINE__, getNode());

*(int32_t *)cursor = (int32_t)((uint8_t *)glueAddress - cursor - 4);
cursor += 4;

// needs a stack map at this location because populateAndResolveVTableSlot provides this address as the return
// address in this frame
gcMap().registerStackMap(cursor, cg());

// Lay down constant pool and cpindex for jitResolveVirtualMethod helper to use
//
uintptrj_t cpAddr = (uintptrj_t)_methodSymRef->getOwningMethod(comp)->constantPool();
*(intptrj_t *)cursor = cpAddr;
cg()->addExternalRelocation(
new (cg()->trHeapMemory()) TR::ExternalRelocation(cursor,
(uint8_t *)cpAddr,
getNode() ? (uint8_t *)(uintptr_t)getNode()->getInlinedSiteIndex() : (uint8_t *)-1,
TR_ConstantPool,
cg()),
__FILE__, __LINE__, getNode());


cursor += sizeof(intptrj_t);
*(uintptrj_t *)cursor = (uintptrj_t)_methodSymRef->getCPIndexForVM();
cursor += sizeof(intptrj_t);

// Squirrel away the first two encoded bytes of the original call instruction.
//
uint8_t *callInstruction = _callInstruction->getBinaryEncoding();
*cursor++ = *(callInstruction);
*cursor++ = *(callInstruction+1);

// Write a call to this snippet over the original call instruction.
//
*callInstruction = 0xe8; // CALLImm4
*(uint32_t *)(callInstruction+1) = (uint32_t)(cg()->getBinaryBufferCursor() - (callInstruction + 5));

return cursor;
}


// TODO: Delete this once the (dead) call site is deleted from omr.
void
TR_Debug::print(TR::FILE *pOutFile, TR::X86UnresolvedVirtualCallSnippet *snippet)
{
if (pOutFile == NULL)
return;

uint8_t *bufferPos = snippet->getSnippetLabel()->getCodeLocation();

printSnippetLabel(pOutFile, snippet->getSnippetLabel(), bufferPos, getName(snippet));
printPrefix(pOutFile, NULL, bufferPos, snippet->getLength(bufferPos - (uint8_t*)0));
trfprintf(pOutFile, "\t\t\t\t%s mysterious new unresolved virtual call snippet code",
commentString());
return;

}



uint32_t TR::X86UnresolvedVirtualCallSnippet::getLength(int32_t estimatedSnippetStart)
{
return
1 // push EDX
+ 5 // CALL resolveAndPopulateVTableDispatch
+ 4 // cpAddr
+ 4 // cpIndex
+ 2; // CALL opcode + modRM
TR_ASSERT_FATAL(false, "stub for staged deletion");
}
31 changes: 1 addition & 30 deletions runtime/compiler/x/codegen/CallSnippet.hpp
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2018 IBM Corp. and others
* Copyright (c) 2000, 2019 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -102,35 +102,6 @@ class X86PicDataSnippet : public TR::Snippet
bool shouldEmitJ2IThunkPointer();
};


class X86UnresolvedVirtualCallSnippet : public TR::UnresolvedDataSnippet
{
TR::SymbolReference *_methodSymRef;
TR::Instruction *_callInstruction;

public:

X86UnresolvedVirtualCallSnippet(
TR::Node *node,
TR::SymbolReference *methodSymRef,
TR::Instruction *callInstruction,
TR::CodeGenerator *cg) :
TR::UnresolvedDataSnippet(cg, node, node->getSymbolReference(), false, true),
_methodSymRef(methodSymRef),
_callInstruction(callInstruction)
{ setDataReferenceInstruction(callInstruction); }

TR::SymbolReference *getMethodSymRef() {return _methodSymRef;}

virtual Kind getKind() { return (IsUnresolvedVirtualCall); }

virtual uint8_t *emitSnippetBody();

virtual uint32_t getLength(int32_t estimatedSnippetStart);

};


class X86CallSnippet : public TR::Snippet
{
public:
Expand Down
22 changes: 4 additions & 18 deletions runtime/compiler/x/codegen/X86PrivateLinkage.cpp
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2018 IBM Corp. and others
* Copyright (c) 2000, 2019 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -2169,23 +2169,9 @@ TR::Instruction *TR::X86PrivateLinkage::buildVFTCall(TR::X86CallSite &site, TR_X

callInstr->setNeedsGCMap(site.getPreservedRegisterMask());

if (site.getSymbolReference()->isUnresolved() && !site.getMethodSymbol()->isInterface())
{
generateBoundaryAvoidanceInstruction(
TR::X86BoundaryAvoidanceInstruction::unresolvedAtomicRegions, 8, 8, callInstr, cg());

TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg()->trHeapMemory(),cg());
TR::UnresolvedDataSnippet *snippet = new (comp()->trHeapMemory()) TR::X86UnresolvedVirtualCallSnippet(
callNode,
site.getSymbolReference(),
callInstr,
cg());

// Need to do this so that stack map registered inside the snippet
targetAddressMemref->setUnresolvedDataSnippet(snippet);
snippet->gcMap().setGCRegisterMask(site.getPreservedRegisterMask());
cg()->addSnippet(snippet);
}
TR_ASSERT_FATAL(
!site.getSymbolReference()->isUnresolved() || site.getMethodSymbol()->isInterface(),
"buildVFTCall: unresolved virtual site");

if (cg()->enableSinglePrecisionMethods() &&
comp()->getJittedMethodSymbol()->usesSinglePrecisionMode())
Expand Down

0 comments on commit b1cb17c

Please sign in to comment.