Skip to content

Commit

Permalink
(0.41) AArch64: Add space for outgoing JNI argument to J9CInterpreter…
Browse files Browse the repository at this point in the history
…StackFrame

This commit adds outgoingArguments array to J9CInterpreterStackFrame
for AArch64 VM to avoid overwriting preservedGPRs and preservedFPRs when
calling out to jitReleaseVMAccess helper from JNI invocation sequence.

This commit also updates ARM64JNILinkage to store JNI method arguments
that are passed by stack to the area where the outgoingArguments array resides.

Adding outgoingArguments makes the size of J9CInterpreterStackFrame large
and some slots are not accessible with ldp/stp instructions
using sp as a base register. Thus, this commit also updates m4 macros
in arm64helpers.m4. They use the start of slots for saving JIT GPRs
as the base address and x16 as the base register.

Issue: #18149

Signed-off-by: Akira Saitoh <saiaki@jp.ibm.com>
  • Loading branch information
Akira1Saitoh committed Nov 20, 2023
1 parent a803f00 commit 7494620
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 89 deletions.
6 changes: 2 additions & 4 deletions runtime/codert_vm/arm64nathelp.m4
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,7 @@ define({OLD_DUAL_MODE_HELPER_NO_RETURN_VALUE},{

define({NEW_DUAL_MODE_HELPER},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand All @@ -190,8 +189,7 @@ define({NEW_DUAL_MODE_HELPER},{

define({NEW_DUAL_MODE_HELPER_NO_RETURN_VALUE},{
DECLARE_EXTERN(fast_$1)
START_PROC($1)
SAVE_FPLR
BEGIN_HELPER($1)
CALL_DIRECT(fast_$1)
cbz x0,.L_done_$1
ldr x30,JIT_GPR_SAVE_SLOT(30)
Expand Down
48 changes: 12 additions & 36 deletions runtime/compiler/aarch64/codegen/ARM64JNILinkage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
{
const TR::ARM64LinkageProperties &properties = _systemLinkage->getProperties();
TR::ARM64MemoryArgument *pushToMemory = NULL;
TR::Register *argMemReg;
TR::Register *tempReg;
int32_t argIndex = 0;
int32_t numMemArgs = 0;
Expand Down Expand Up @@ -503,12 +502,18 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
if (numMemArgs > 0)
{
pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs];

argMemReg = cg()->allocateRegister();
}

// align to 16-byte boundary
totalSize = (totalSize + 15) & (~15);
/*
* We have J9_INLINE_JNI_MAX_ARG_COUNT (32) slots in interpreter frame.
* If the size of the native stack used for passing parameters exceeds this size,
* it will overwrite preserved registers.
* The number of parameters is guaranteed to be <= J9_INLINE_JNI_MAX_ARG_COUNT
* when HasFixedFrameC_CallingConvention codegen flag is set.
*/
TR_ASSERT_FATAL(totalSize <= sizeof(UDATA) * J9_INLINE_JNI_MAX_ARG_COUNT, "totalSize must not be larger than J9_INLINE_JNI_MAX_ARG_COUNT slots");

numIntegerArgs = 0;
numFloatArgs = 0;
Expand All @@ -522,6 +527,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#if defined(OSX)
sigCursor = (char *)sig;
#endif
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());

for (i = firstArgumentChild; i < callNode->getNumChildren(); i++)
{
Expand Down Expand Up @@ -611,7 +617,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#else
#error Unsupported platform
#endif
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numIntegerArgs++;
Expand Down Expand Up @@ -672,7 +678,7 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen
#error Unsupported platform
#endif
}
getOutgoingArgumentMemRef(argMemReg, argOffset, argRegister, op, pushToMemory[argIndex++]);
getOutgoingArgumentMemRef(sp, argOffset, argRegister, op, pushToMemory[argIndex++]);
argOffset += offsetInc;
}
numFloatArgs++;
Expand Down Expand Up @@ -746,17 +752,12 @@ size_t J9::ARM64::JNILinkage::buildJNIArgs(TR::Node *callNode, TR::RegisterDepen

if (numMemArgs > 0)
{
TR::RealRegister *sp = cg()->machine()->getRealRegister(properties.getStackPointerRegister());
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, argMemReg, sp, totalSize);

for (argIndex = 0; argIndex < numMemArgs; argIndex++)
{
TR::Register *aReg = pushToMemory[argIndex].argRegister;
generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg);
cg()->stopUsingRegister(aReg);
}

cg()->stopUsingRegister(argMemReg);
}

return totalSize;
Expand Down Expand Up @@ -1058,20 +1059,7 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
#endif
TR::RegisterDependencyConditions *deps = new (trHeapMemory()) TR::RegisterDependencyConditions(maxRegisters, maxPostRegisters, trMemory());

size_t spSize = buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);
TR::RealRegister *sp = machine()->getRealRegister(_systemLinkage->getProperties().getStackPointerRegister());

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}
buildJNIArgs(callNode, deps, passThread, passReceiver, killNonVolatileGPRs);

TR::Register *returnRegister = getReturnRegisterFromDeps(callNode, deps);

Expand Down Expand Up @@ -1111,18 +1099,6 @@ TR::Register *J9::ARM64::JNILinkage::buildDirectDispatch(TR::Node *callNode)
TR::Instruction *callInstr = generateMethodDispatch(callNode, isJNIGCPoint, deps, targetAddress, x9Reg);
generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, returnLabel, callInstr);

if (spSize > 0)
{
if (constantIsUnsignedImm12(spSize))
{
generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::addimmx, callNode, sp, sp, spSize);
}
else
{
TR_ASSERT_FATAL(false, "Too many arguments.");
}
}

if (dropVMAccess)
{
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
Expand Down
3 changes: 3 additions & 0 deletions runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ J9::ARM64::CodeGenerator::initialize()
{
comp->setOption(TR_EnableMonitorCacheLookup);
}

if (comp->fej9()->hasFixedFrameC_CallingConvention())
cg->setHasFixedFrameC_CallingConvention();
}

TR::Linkage *
Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/env/VMJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9662,7 +9662,7 @@ portLibCall_sysinfo_has_fixed_frame_C_calling_convention()

/* AArch64 */
#if defined(TR_HOST_ARM64) && defined(TR_TARGET_ARM64)
return false;
return true;
#endif

/* X86 */
Expand Down
114 changes: 66 additions & 48 deletions runtime/oti/arm64helpers.m4
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,23 @@ define({JIT_GPR_SAVE_OFFSET},{eval(J9TR_cframe_jitGPRs+(($1)*ALen))})
define({JIT_GPR_SAVE_SLOT},{[sp,{#}JIT_GPR_SAVE_OFFSET($1)]})
define({JIT_FPR_SAVE_OFFSET},{eval(J9TR_cframe_jitFPRs+(($1)*8))})
define({JIT_FPR_SAVE_SLOT},{[sp,{#}JIT_FPR_SAVE_OFFSET($1)]})
dnl The size of J9CInterpreterStackFrame is now larger than 504 bytes, so we cannot simply use sp as the base register for ldp/stp instructions
dnl because the offset would become too large and invalid for ldp/stp.
dnl Thus, we use sp + JIT_GPR_SAVE_OFFSET(0) as the base address for ldp/stp. We chose x16 as the base register.
dnl It is intra procedure call register and JIT does not use it, so it is safe to clobber x16.
define({JIT_GPR_SAVE_SLOT_X16},{[x16,{#}eval(($1)*ALen)]})
define({JIT_FPR_SAVE_SLOT_X16},{[x16,{#}eval(JIT_FPR_SAVE_OFFSET($1)-JIT_GPR_SAVE_OFFSET(0))]})

define({SAVE_FPLR},{stp x29,x30,JIT_GPR_SAVE_SLOT(29)})
dnl JIT_GPR_SAVE_OFFSET is too large to fit in ldp/stp instructions.
define({SAVE_FPLR},{
str x29,JIT_GPR_SAVE_SLOT(29)
str x30,JIT_GPR_SAVE_SLOT(30)
})

define({RESTORE_FPLR},{ldp x29,x30,JIT_GPR_SAVE_SLOT(29)})
define({RESTORE_FPLR},{
ldr x29,JIT_GPR_SAVE_SLOT(29)
ldr x30,JIT_GPR_SAVE_SLOT(30)
})

define({BEGIN_HELPER},{
START_PROC($1)
Expand All @@ -95,22 +108,23 @@ define({CALL_C_WITH_VMTHREAD},{
CALL_DIRECT($1)
})

dnl x16 is inter-procedure-call register
define({SAVE_C_VOLATILE_REGS},{
stp x0,x1,JIT_GPR_SAVE_SLOT(0)
stp x2,x3,JIT_GPR_SAVE_SLOT(2)
stp x4,x5,JIT_GPR_SAVE_SLOT(4)
stp x6,x7,JIT_GPR_SAVE_SLOT(6)
stp x8,x9,JIT_GPR_SAVE_SLOT(8)
stp x10,x11,JIT_GPR_SAVE_SLOT(10)
stp x12,x13,JIT_GPR_SAVE_SLOT(12)
stp x14,x15,JIT_GPR_SAVE_SLOT(14)
stp x16,x17,JIT_GPR_SAVE_SLOT(16)
str x18,JIT_GPR_SAVE_SLOT(18)
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
stp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
stp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
stp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
stp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
stp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
stp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
stp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
stp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
stp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
ifdef({METHOD_INVOCATION},{
stp d0,d1,JIT_FPR_SAVE_SLOT(0)
stp d2,d3,JIT_FPR_SAVE_SLOT(2)
stp d4,d5,JIT_FPR_SAVE_SLOT(4)
stp d6,d7,JIT_FPR_SAVE_SLOT(6)
stp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
stp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
stp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
stp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
st1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -125,11 +139,12 @@ ifdef({METHOD_INVOCATION},{
})

define({RESTORE_C_VOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ifdef({METHOD_INVOCATION},{
ldp d0,d1,JIT_FPR_SAVE_SLOT(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT(6)
ldp d0,d1,JIT_FPR_SAVE_SLOT_X16(0)
ldp d2,d3,JIT_FPR_SAVE_SLOT_X16(2)
ldp d4,d5,JIT_FPR_SAVE_SLOT_X16(4)
ldp d6,d7,JIT_FPR_SAVE_SLOT_X16(6)
},{ dnl METHOD_INVOCATION
add x15, sp, JIT_FPR_SAVE_OFFSET(0)
ld1 {{v0.4s, v1.4s, v2.4s, v3.4s}}, [x15], #64
Expand All @@ -141,37 +156,38 @@ ifdef({METHOD_INVOCATION},{
ld1 {{v24.4s, v25.4s, v26.4s, v27.4s}}, [x15], #64
ld1 {{v28.4s, v29.4s, v30.4s, v31.4s}}, [x15]
}) dnl METHOD_INVOCATION
ldp x0,x1,JIT_GPR_SAVE_SLOT(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT(14)
ldp x16,x17,JIT_GPR_SAVE_SLOT(16)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldp x0,x1,JIT_GPR_SAVE_SLOT_X16(0)
ldp x2,x3,JIT_GPR_SAVE_SLOT_X16(2)
ldp x4,x5,JIT_GPR_SAVE_SLOT_X16(4)
ldp x6,x7,JIT_GPR_SAVE_SLOT_X16(6)
ldp x8,x9,JIT_GPR_SAVE_SLOT_X16(8)
ldp x10,x11,JIT_GPR_SAVE_SLOT_X16(10)
ldp x12,x13,JIT_GPR_SAVE_SLOT_X16(12)
ldp x14,x15,JIT_GPR_SAVE_SLOT_X16(14)
ldp x17,x18,JIT_GPR_SAVE_SLOT_X16(17)
})

dnl No need to save/restore d8-15 - the stack walker will never need to read
dnl or modify them (no preserved FPRs in the JIT private linkage).

define({SAVE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x19,JIT_GPR_SAVE_SLOT(19)
stp x20,x21,JIT_GPR_SAVE_SLOT(20)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_C_NONVOLATILE_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x19,JIT_GPR_SAVE_SLOT(19)
ldp x20,x21,JIT_GPR_SAVE_SLOT(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x20,x21,JIT_GPR_SAVE_SLOT_X16(20)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({SAVE_ALL_REGS},{
Expand All @@ -185,21 +201,23 @@ define({RESTORE_ALL_REGS},{
})

define({SAVE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
str x18,JIT_GPR_SAVE_SLOT(18)
str x21,JIT_GPR_SAVE_SLOT(21)
stp x22,x23,JIT_GPR_SAVE_SLOT(22)
stp x24,x25,JIT_GPR_SAVE_SLOT(24)
stp x26,x27,JIT_GPR_SAVE_SLOT(26)
stp x28,x29,JIT_GPR_SAVE_SLOT(28)
stp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
stp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
stp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
stp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({RESTORE_PRESERVED_REGS},{
add x16, sp, JIT_GPR_SAVE_OFFSET(0)
ldr x18,JIT_GPR_SAVE_SLOT(18)
ldr x21,JIT_GPR_SAVE_SLOT(21)
ldp x22,x23,JIT_GPR_SAVE_SLOT(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT(28)
ldp x22,x23,JIT_GPR_SAVE_SLOT_X16(22)
ldp x24,x25,JIT_GPR_SAVE_SLOT_X16(24)
ldp x26,x27,JIT_GPR_SAVE_SLOT_X16(26)
ldp x28,x29,JIT_GPR_SAVE_SLOT_X16(28)
})

define({BRANCH_VIA_VMTHREAD},{
Expand Down
1 change: 1 addition & 0 deletions runtime/oti/j9nonbuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -6287,6 +6287,7 @@ typedef struct J9CInterpreterStackFrame {
U_8 jitFPRs[16 * 8]; /* fpr0-15 */
#endif /* J9VM_ENV_DATA64 */
#elif defined(J9VM_ARCH_AARCH64) /* J9VM_ARCH_ARM */
UDATA outgoingArguments[J9_INLINE_JNI_MAX_ARG_COUNT];
UDATA preservedGPRs[12]; /* x19-x30 */
U_8 preservedFPRs[8 * 8]; /* v8-15 */
J9JITGPRSpillArea jitGPRs;
Expand Down

0 comments on commit 7494620

Please sign in to comment.