Skip to content

Commit

Permalink
Merge pull request #9249 from Akira1Saitoh/aarch64UnresolvedVirtualCa…
Browse files Browse the repository at this point in the history
…lllockv0.20.0

(0.20.0) AArch64: Make unresolved virtual call thread safe
  • Loading branch information
pshipton committed Apr 15, 2020
2 parents f46ee78 + b77489e commit 05fa2d3
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 36 deletions.
11 changes: 5 additions & 6 deletions runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,18 +1224,17 @@ void J9::ARM64::PrivateLinkage::buildVirtualDispatch(TR::Node *callNode,
TR::ARM64VirtualUnresolvedSnippet(cg(), callNode, vcSnippetLabel, argSize, doneLabel, (uint8_t *)thunk);
cg()->addSnippet(vcSnippet);

TR::Register *dstReg = cg()->allocateRegister();

// The following instructions are modified by _virtualUnresolvedHelper
// in aarch64/runtime/PicBuilder.spp to load the vTable index in x9
generateTrg1ImmInstruction(cg(), TR::InstOpCode::movzx, callNode, x9, 0);

// This `b` instruction is modified to movzx x9, lower 16bit of offset
generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, vcSnippetLabel);
generateTrg1ImmInstruction(cg(), TR::InstOpCode::movkx, callNode, x9, TR::MOV_LSL16);
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::sbfmx, callNode, x9, x9, 0x1F); // sxtw x9, w9
tempMR = new (trHeapMemory()) TR::MemoryReference(vftReg, x9, cg());
generateTrg1MemInstruction(cg(), TR::InstOpCode::ldroffx, callNode, dstReg, tempMR);
gcPoint = generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, vcSnippetLabel);

cg()->stopUsingRegister(dstReg);
generateTrg1MemInstruction(cg(), TR::InstOpCode::ldroffx, callNode, x9, tempMR);
gcPoint = generateRegBranchInstruction(cg(), TR::InstOpCode::blr, callNode, x9);
}
else
{
Expand Down
19 changes: 17 additions & 2 deletions runtime/compiler/aarch64/codegen/CallSnippet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,10 @@ uint8_t *TR::ARM64VirtualUnresolvedSnippet::emitSnippetBody()
TR_J2IVirtualThunkPointer, cg()),
__FILE__, __LINE__, callNode);

return cursor + 8;
cursor += 8;
// Lock word
*(int32_t *)cursor = 0;
return cursor + sizeof(int32_t);
}

void
Expand All @@ -499,11 +502,23 @@ TR_Debug::print(TR::FILE *pOutFile, TR::ARM64VirtualUnresolvedSnippet * snippet)

printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t));
trfprintf(pOutFile, ".dword \t0x%08x\t\t; cpIndex", *(intptr_t *)cursor);
cursor += sizeof(intptr_t);

printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t));
trfprintf(pOutFile, ".dword \t" POINTER_PRINTF_FORMAT "\t\t; Private J9Method pointer", *(intptr_t *)cursor);
cursor += sizeof(intptr_t);

printPrefix(pOutFile, NULL, cursor, sizeof(intptr_t));
trfprintf(pOutFile, ".dword \t" POINTER_PRINTF_FORMAT "\t\t; J2I thunk address for private", *(intptr_t *)cursor);
cursor += sizeof(intptr_t);

printPrefix(pOutFile, NULL, cursor, 4);
trfprintf(pOutFile, ".word \t0x%08x\t\t; Lock Word For Resolution", *(int32_t *)cursor);
}

uint32_t TR::ARM64VirtualUnresolvedSnippet::getLength(int32_t estimatedSnippetStart)
{
return 44;
return 48;
}

uint8_t *TR::ARM64InterfaceCallSnippet::emitSnippetBody()
Expand Down
65 changes: 37 additions & 28 deletions runtime/compiler/aarch64/runtime/PicBuilder.spp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
.set J9TR_UVCSnippet_CPIndex, 16
.set J9TR_UVCSnippet_method, 24
.set J9TR_UVCSnippet_J2IThunk, 32
.set J9TR_UVCSnippet_lockword, 40

// Unresolved data snippet

Expand Down Expand Up @@ -630,17 +631,16 @@ const_jitResolveConstantDynamic:
//
// in: x30 = snippet
//
// trash: x10, x11
// trash: x10, x11, x12

// For virtual unresolved call, we generate following instructions
// movz x9, #0
// b VirtualUnresolvedSnippet ; change this to "movz x9, #low16bits"
// movk x9, #0, LSL #16
// sxtw x9, w9
// ldr dstReg, [vftReg, x9]
// b VirtualUnresolvedSnippet ; change this to "blr dstReg"
// ldr x9, [vftReg, x9]
// blr x9
//
// We encode the resolved index value into movz and movk instructions first
// Then the b instruction is changed to "blr dstReg"
// We encode the resolved index value (signed 32 bits) into movz and movk instructions
//
_virtualUnresolvedHelper:
stp x7, x6, [J9SP, #-64]! // save parameter regs. jitWalkResolveMethodFrame assumes that argument registers are saved in this order
Expand Down Expand Up @@ -680,38 +680,47 @@ L_calloutPrivate:
add J9SP, J9SP, #64
br x10 // Call the target, not returning here
L_callVirtual:
mov x2, x0
sub x0, x10, #20 // get the address of the movz instruction
ldr w1, [x0] // fetch the movz instruction
ubfx x3, x2, #0, #16 // lower 16 bits of the index
orr w1, w1, w3, LSL #5 // encode the index in the movz instruction
str w1, [x0] // store the movz instruction
ldr w1, [x0, #4] // fetch the movk instruction
ubfx x3, x2, #16, #16 // next 16 bits of the index
orr w1, w1, w3, LSL #5 // encode the index in the movk instruction
str w1, [x0, #4] // store the movk instruction
mov x1, #8 // 2 instruction to flush
add x1, x11, #J9TR_UVCSnippet_lockword // address of the lockword
mov w3, #1
ldxr w2, [x1]
cbnz w2, L_spinForUpdate // already locked by another thread
stxr w2, w3, [x1] // try to lock
cbnz w2, L_spinForUpdate // failed to lock
mov x12, x0 // resolved index
sub x0, x10, #16 // get the address of the movk instruction
ldr w1, [x0] // fetch the movk instruction
ubfx x2, x12, #16, #16 // upper 16 bits of the index
orr w1, w1, w2, LSL #5 // encode the index in the movk instruction
str w1, [x0] // store the movk instruction
mov x1, #4 // 1 instruction to flush
bl flushICache
sub x0, x10, #8 // get the address of the ldr instruction
ldr w2, [x0] // fetch the ldr instruction
ubfx x2, x2, #0, #5 // extract the dstReg
ldr w3, const_blr // fetch constant for the blr instruction
orr w3, w3, w2, LSL #5 // encode the dstReg in the blr instruction
sub x0, x10, #4 // get the address of the b instruction
str w3, [x0] // store instruction
ldr w1, const_movz_x9 // fetch movz template
ubfx x12, x12, #0, #16 // lower 16 bits of the index
sub x0, x10, #20 // get the address of the b instruction
orr w1, w1, w12, LSL #5 // encode the index in the movz instruction
str w1, [x0] // store the movz instruction
mov x1, #4 // 1 instruction to flush
bl flushICache
sub x30, x10, #20 // set the movk instruction as the destination
mov w0, #0
dmb sy
str w0, [x11, #J9TR_UVCSnippet_lockword] // unlock
L_calloutVirtual:
sub x30, x10, #20 // set the movz instruction as the destination
ldp x7, x6, [J9SP, #0] // restore other parameter regs
ldp x5, x4, [J9SP, #16]
ldp x3, x2, [J9SP, #32]
ldp x1, x0, [J9SP, #48]
add J9SP, J9SP, #64
ret // jump back to the movk instruction
ret // jump back to the movz instruction

L_spinForUpdate:
ldr w2, [x1]
cbnz w2, L_spinForUpdate
b L_calloutVirtual // another thread completed rewriting instructions

.align 2
const_blr:
.word 0xD63F0000
const_movz_x9:
.word 0xD2800009 // template for "movz x9, #0"

// Handles calls to interface call snippets
//
Expand Down

0 comments on commit 05fa2d3

Please sign in to comment.