diff --git a/runtime/compiler/control/rossa.cpp b/runtime/compiler/control/rossa.cpp index 7760db565af..55fa843ccd4 100644 --- a/runtime/compiler/control/rossa.cpp +++ b/runtime/compiler/control/rossa.cpp @@ -1310,6 +1310,8 @@ onLoadInternal( // Now that the options have been processed we can initialize the RuntimeAssumptionTables // If we cannot allocate various runtime assumption hash tables, fail the JVM + fe->initializeSystemProperties(); + // Allocate trampolines for z/OS 64-bit #if defined(J9ZOS390) if (TR::Options::getCmdLineOptions()->getOption(TR_EnableRMODE64) && !isQuickstart) @@ -1467,8 +1469,6 @@ onLoadInternal( jitConfig->thunkLookUpNameAndSig = &j9ThunkLookupNameAndSig; - fe->initializeSystemProperties(); - TR::CompilationInfo * compInfo = TR::CompilationInfo::get(); // Now that we have all options (and before starting the compilation thread) we diff --git a/runtime/compiler/p/codegen/Trampoline.cpp b/runtime/compiler/p/codegen/Trampoline.cpp index ae6bd40c4d7..dd4b8673a8d 100644 --- a/runtime/compiler/p/codegen/Trampoline.cpp +++ b/runtime/compiler/p/codegen/Trampoline.cpp @@ -34,24 +34,11 @@ namespace TR { class PersistentInfo; } -#if defined(TR_TARGET_64BIT) -#define TRAMPOLINE_SIZE 28 -#define OFFSET_IPIC_TO_CALL 36 -#else -#define TRAMPOLINE_SIZE 16 -#define OFFSET_IPIC_TO_CALL 32 -#endif - extern "C" { extern int __j9_smp_flag; - int32_t ppcTrampolineInitByCodeCache(TR_FrontEnd *, uint8_t *, uintptr_t); }; -#ifdef TR_HOST_POWER -extern void ppcCodeSync(uint8_t *, uint32_t); -#endif - void * ppcPicTrampInit(TR_FrontEnd *vm, TR::PersistentInfo * persistentInfo) { void *retVal = 0; @@ -65,7 +52,8 @@ void * ppcPicTrampInit(TR_FrontEnd *vm, TR::PersistentInfo * persistentInfo) #ifdef TR_TARGET_64BIT TR_J9VMBase *fej9 = (TR_J9VMBase *)vm; - if (!fej9->isAOT_DEPRECATED_DO_NOT_USE()) // don't init TOC if it is jar2jxe AOT compile + if (!fej9->isAOT_DEPRECATED_DO_NOT_USE() && // don't init TOC if it is jar2jxe AOT compile + !TR::Options::getCmdLineOptions()->getOption(TR_DisableTOC)) { retVal = TR_PPCTableOfConstants::initTOC(fej9, persistentInfo, 0); } diff --git a/runtime/compiler/runtime/Trampoline.cpp b/runtime/compiler/runtime/Trampoline.cpp index 66d1d56d016..af81bd580ee 100644 --- a/runtime/compiler/runtime/Trampoline.cpp +++ b/runtime/compiler/runtime/Trampoline.cpp @@ -36,11 +36,14 @@ #if defined(TR_TARGET_POWER) +// Target address prediction is based on 32-byte blocks on POWER +// we adjust the trampoline size to align with this block-size +// regardless 32bit or 64bit. +#define TRAMPOLINE_SIZE 32 + #if defined(TR_TARGET_64BIT) -#define TRAMPOLINE_SIZE 28 #define OFFSET_IPIC_TO_CALL 36 #else -#define TRAMPOLINE_SIZE 16 #define OFFSET_IPIC_TO_CALL 32 #endif @@ -52,13 +55,12 @@ void ppcCodeCacheConfig(int32_t ccSizeInByte, int32_t *numTempTrampolines) { // Estimated: 2KB per method, with 10% being recompiled(multi-times) - *numTempTrampolines = ccSizeInByte>>12; + *numTempTrampolines = TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10) ? 0 : (ccSizeInByte>>13); } void ppcCreateHelperTrampolines(uint8_t *trampPtr, int32_t numHelpers) { TR::CodeCacheConfig &config = TR::CodeCacheManager::instance()->codeCacheConfig(); - static bool customP4 = feGetEnv("TR_CustomP4Trampoline") ? true : false; uint8_t *bufferStart = trampPtr, *buffer; for (int32_t cookie=1; cookietarget.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) + { + if (!TR::Options::getCmdLineOptions()->getOption(TR_DisableTOC)) + { + // ld gr11, [grPTOC, 8*(cookie-1)] + *(int32_t *)buffer = 0xe9700000 | (((cookie-1)*sizeof(intptr_t)) & 0x0000ffff); + buffer += 4; + } + else + { + // only gr11 is available for helper dispatch + + // lis gr11, upper 16-bits + *(int32_t *)buffer = 0x3d600000 | ((helper>>48) & 0x0000ffff); + buffer += 4; + + // oris gr11, gr11, bits 16--31 + *(int32_t *)buffer = 0x656b0000 | ((helper>>32) & 0x0000ffff); + buffer += 4; + + // rldicr gr11, gr11, 32, 31 + *(int32_t *)buffer = 0x796b07c6; + buffer += 4; + + // oris gr11, gr11, bits 32-47 + *(int32_t *)buffer = 0x656b0000 | ((helper>>16) & 0x0000ffff); + buffer += 4; + + // ori gr11, gr11, bits 48--63 + *(int32_t *)buffer = 0x616b0000 | (helper & 0x0000ffff); + buffer += 4; + } + } + else + { + // pld gr11, [,16], 1 (PC-relative) + *(int32_t *)buffer = 0x04100000; + buffer += 4; + *(int32_t *)buffer = 0xe5600010; buffer += 4; + } #else + if (!TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) + { // For POWER4 which has a problem with the CTR/LR cache when the upper // bits are not 0 extended.. Use li/oris when the 16th bit is off - if (!(helper & 0x00008000) ) + if (!(helper & 0x00008000)) { // li r11, lower *(int32_t *)buffer = 0x39600000 | (helper & 0x0000ffff); @@ -95,16 +137,22 @@ void ppcCreateHelperTrampolines(uint8_t *trampPtr, int32_t numHelpers) *(int32_t *)buffer = 0x396b0000 | (helper & 0x0000ffff); buffer += 4; - // Now, if highest bit is on we need to clear the sign extend bits on 64bit CPUs - // ** POWER4 pref fix ** - if ((helper & 0x80000000) && (!customP4 || TR::comp()->target().cpu.is(OMR_PROCESSOR_PPC_GP))) + if (helper & 0x80000000) { // rlwinm r11,r11,sh=0,mb=0,me=31 *(int32_t *)buffer = 0x556b003e; buffer += 4; } } - + } + else + { + // plwz gr11, [,16], 1 (PC-relative) + *(int32_t *)buffer = 0x06100000; + buffer += 4; + *(int32_t *)buffer = 0x81600010; + buffer += 4; + } #endif // mtctr r11 @@ -114,46 +162,61 @@ void ppcCreateHelperTrampolines(uint8_t *trampPtr, int32_t numHelpers) // bctr *(int32_t *)buffer = 0x4e800420; buffer += 4; - } + + if (TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) + { + *(intptr_t *)buffer = helper; + } + } + #ifdef TR_HOST_POWER ppcCodeSync(trampPtr, config.trampolineCodeSize() * numHelpers); #endif - } void ppcCreateMethodTrampoline(void *trampPtr, void *startPC, void *method) { - static bool customP4 = feGetEnv("TR_CustomP4Trampoline") ? true : false; uint8_t *buffer = (uint8_t *)trampPtr; J9::PrivateLinkage::LinkageInfo *linkInfo = J9::PrivateLinkage::LinkageInfo::get(startPC); intptr_t dispatcher = (intptr_t)((uint8_t *)startPC + linkInfo->getReservedWord()); // Take advantage of both gr0 and gr11 ... #if defined(TR_TARGET_64BIT) - // lis gr0, upper 16-bits - *(int32_t *)buffer = 0x3c000000 | ((dispatcher>>48) & 0x0000ffff); - buffer += 4; + if (!TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) + { + // lis gr0, upper 16-bits + *(int32_t *)buffer = 0x3c000000 | ((dispatcher>>48) & 0x0000ffff); + buffer += 4; - // lis gr11, bits 32--47 - *(int32_t *)buffer = 0x3d600000 | ((dispatcher>>16) & 0x0000ffff); - buffer += 4; + // lis gr11, bits 32--47 + *(int32_t *)buffer = 0x3d600000 | ((dispatcher>>16) & 0x0000ffff); + buffer += 4; - // ori gr0, gr0, bits 16-31 - *(int32_t *)buffer = 0x60000000 | ((dispatcher>>32) & 0x0000ffff); - buffer += 4; + // ori gr0, gr0, bits 16-31 + *(int32_t *)buffer = 0x60000000 | ((dispatcher>>32) & 0x0000ffff); + buffer += 4; - // ori gr11, gr11, bits 48--63 - *(int32_t *)buffer = 0x616b0000 | (dispatcher & 0x0000ffff); - buffer += 4; + // ori gr11, gr11, bits 48--63 + *(int32_t *)buffer = 0x616b0000 | (dispatcher & 0x0000ffff); + buffer += 4; - // rldimi gr11, gr0, 32, 0 - *(int32_t *)buffer = 0x780b000e; - buffer += 4; + // rldimi gr11, gr0, 32, 0 + *(int32_t *)buffer = 0x780b000e; + buffer += 4; + } + else + { + // pld gr11, [,16], 1 (PC-relative) + *(int32_t *)buffer = 0x04100000; + buffer += 4; + *(int32_t *)buffer = 0xe5600010; + buffer += 4; + } #else - // For POWER4 which has a problem with the CTR/LR cache when the upper - // bits are not 0 extended. Use li/oris when the 16th bit is off - if (customP4) + if (!TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) { + // For POWER4 which has a problem with the CTR/LR cache when the upper + // bits are not 0 extended. Use li/oris when the 16th bit is off if (!(dispatcher & 0x00008000)) { // li r11, lower @@ -167,16 +230,15 @@ void ppcCreateMethodTrampoline(void *trampPtr, void *startPC, void *method) else { // lis gr11, upper - *(int32_t *)buffer = 0x3d600000 | (((dispatcher>>16) + (dispatcher&(1<<15)?1:0)) & 0x0000ffff); + *(int32_t *)buffer = 0x3d600000 | + (((dispatcher>>16) + (dispatcher&(1<<15)?1:0)) & 0x0000ffff); buffer += 4; // addi gr11, gr11, lower *(int32_t *)buffer = 0x396b0000 | (dispatcher & 0x0000ffff); buffer += 4; - // Now, if highest bit is on we need to clear the sign extend bits on 64bit CPUs - // ** POWER4 pref fix ** - if ((dispatcher & 0x80000000) && (customP4 && TR::comp()->target().cpu.is(OMR_PROCESSOR_PPC_GP))) + if (dispatcher & 0x80000000) { // rlwinm r11,r11,sh=0,mb=0,me=31 *(int32_t *)buffer = 0x556b003e; @@ -186,12 +248,10 @@ void ppcCreateMethodTrampoline(void *trampPtr, void *startPC, void *method) } else { - // lis gr11, upper - *(int32_t *)buffer = 0x3d600000 | (((dispatcher>>16) + (dispatcher&(1<<15)?1:0)) & 0x0000ffff); + // plwz gr11, [,16], 1 (PC-relative) + *(int32_t *)buffer = 0x06100000; buffer += 4; - - // addi gr11, gr11, lower - *(int32_t *)buffer = 0x396b0000 | (dispatcher & 0x0000ffff); + *(int32_t *)buffer = 0x81600010; buffer += 4; } #endif @@ -202,6 +262,12 @@ void ppcCreateMethodTrampoline(void *trampPtr, void *startPC, void *method) // bcctr *(int32_t *)buffer = 0x4e800420; + buffer += 4; + + if (TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)) + { + *(intptr_t *)buffer = dispatcher; + } #if defined(TR_HOST_POWER) TR::CodeCacheConfig &config = TR::CodeCacheManager::instance()->codeCacheConfig(); @@ -322,33 +388,52 @@ bool ppcCodePatching(void *method, void *callSite, void *currentPC, void *curren } else { - void *newTramp = mcc_replaceTrampoline(reinterpret_cast(method), callSite, currentTramp, currentPC, newPC, true); + // On POWER10 or later, the trampoline can be patched in place atomically. No need temporary trampoline anymore + + void *newTramp = mcc_replaceTrampoline(reinterpret_cast(method), callSite, currentTramp, currentPC, + newPC, !TR::Compiler->target.cpu.isAtLeast(OMR_PROCESSOR_PPC_P10)); if (newTramp == NULL) { //if (currentTramp == NULL) //FIXME we need an assume for runtime as well - TR_ASSERT(0, "This is an internal error.\n"); return false; } - ppcCreateMethodTrampoline(newTramp, newPC, method); + + // currentTramp==NULL or newTramp is a temporary trampoline + if (newTramp != currentTramp) + ppcCreateMethodTrampoline(newTramp, newPC, method); + if (currentTramp == NULL) { distance = (uint8_t *)newTramp - patchAddr; } else { - if (currentDistance != ((uint8_t *)currentTramp - patchAddr)) + if (currentTramp == newTramp) + { + // this effectively is: we are on POWER10 or later, and we can patch the trampoline in place + + *(uint8_t **)((uint8_t *)currentTramp + 16) = entryAddress; + distance = (uint8_t *)currentTramp - patchAddr; + } + else { - oldBits |= ((uint8_t *)currentTramp - patchAddr) & 0x03fffffc; - *(int32_t *)patchAddr = oldBits; + // this effectively is: we are on pre-POWER10, and we need to take care of temporary trampolines + + if (currentDistance != ((uint8_t *)currentTramp - patchAddr)) + { + oldBits |= ((uint8_t *)currentTramp - patchAddr) & 0x03fffffc; + *(int32_t *)patchAddr = oldBits; #if defined(TR_HOST_POWER) - ppcCodeSync(patchAddr, 4); + ppcCodeSync(patchAddr, 4); #endif - } + } - patchAddr = (uint8_t *)currentTramp; - distance = (uint8_t *)newTramp - patchAddr; - currentDistance = 0; - oldBits = 0x48000000; + patchAddr = (uint8_t *)currentTramp; + distance = (uint8_t *)newTramp - patchAddr; + currentDistance = 0; + oldBits = 0x48000000; + } } } } @@ -459,20 +544,8 @@ bool ppcCodePatching(void *method, void *callSite, void *currentPC, void *curren void ppcCodeCacheParameters(int32_t *trampolineSize, void **callBacks, int32_t *numHelpers, int32_t* CCPreLoadedCodeSize) { - static bool customP4 = feGetEnv("TR_CustomP4Trampoline") ? true : false; - -#if defined(TR_TARGET_64BIT) *trampolineSize = TRAMPOLINE_SIZE; -#else - if (customP4) - { - *trampolineSize = TR::comp()->target().cpu.is(OMR_PROCESSOR_PPC_GP) ? TRAMPOLINE_SIZE + 4 : TRAMPOLINE_SIZE; - } - else - { - *trampolineSize = TRAMPOLINE_SIZE + 4; - } -#endif + //TR::CodeCacheConfig &config = TR::CodeCacheManager::instance()->codeCacheConfig(); //fprintf(stderr, "Processor Offset: %d\n", portLibCall_getProcessorType() - TR_FirstPPCProcessor); //fprintf(stderr, "Trampoline Size: %d, %d\n", *trampolineSize, config.trampolineCodeSize);