From 342bcdb170780b49de8abd82490311c8840868af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Thu, 20 Jul 2023 17:15:02 +0900 Subject: [PATCH] Add support for `--instruction-set:native` (#87865) This allows compiling for the ISA extensions that the currently running CPU supports. --- .../nativeaot/Runtime/AsmOffsetsVerify.cpp | 1 + src/coreclr/nativeaot/Runtime/CMakeLists.txt | 7 +- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 1 + src/coreclr/nativeaot/Runtime/PalRedhawk.h | 26 - src/coreclr/nativeaot/Runtime/amd64/GC.asm | 36 -- .../nativeaot/Runtime/amd64/GcProbe.asm | 1 - src/coreclr/nativeaot/Runtime/startup.cpp | 202 +----- .../nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 278 --------- .../nativeaot/Runtime/unix/configure.cmake | 3 - .../Runtime/windows/PalRedhawkMinWin.cpp | 102 ---- .../Compiler/HardwareIntrinsicHelpers.cs | 263 ++++++++ .../Common/Compiler/InstructionSetSupport.cs | 9 + .../tools/Common/InstructionSetHelpers.cs | 28 +- .../Compiler/HardwareIntrinsicHelpers.Aot.cs | 163 ----- src/coreclr/tools/aot/ILCompiler/Program.cs | 24 +- src/coreclr/tools/aot/crossgen2/Program.cs | 12 +- .../tools/aot/jitinterface/CMakeLists.txt | 7 +- .../tools/aot/jitinterface/jitwrapper.cpp | 7 + src/native/minipal/configure.cmake | 7 + src/native/minipal/cpufeatures.c | 573 ++++++++++++++++++ .../minipal/cpufeatures.h} | 33 +- src/native/minipal/cpuid.h | 54 ++ src/native/minipal/minipalconfig.h.in | 7 + 23 files changed, 1001 insertions(+), 843 deletions(-) delete mode 100644 src/coreclr/nativeaot/Runtime/amd64/GC.asm create mode 100644 src/native/minipal/configure.cmake create mode 100644 src/native/minipal/cpufeatures.c rename src/{coreclr/nativeaot/Runtime/IntrinsicConstants.h => native/minipal/cpufeatures.h} (84%) create mode 100644 src/native/minipal/cpuid.h create mode 100644 src/native/minipal/minipalconfig.h.in diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index 705e198aaaca3..2d30ed71701a6 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -16,6 +16,7 @@ #include "RuntimeInstance.h" #include "CachedInterfaceDispatch.h" #include "shash.h" +#include class AsmOffsets { diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 5249be01592cd..d6e45dc776260 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -49,6 +49,8 @@ set(COMMON_RUNTIME_SOURCES ${GC_DIR}/handletablescan.cpp ${GC_DIR}/objecthandle.cpp ${GC_DIR}/softwarewritewatch.cpp + + ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c ) set(SERVER_GC_SOURCES @@ -115,10 +117,6 @@ if (WIN32) list(APPEND FULL_RUNTIME_SOURCES windows/CoffNativeCodeManager.cpp) set(ASM_SUFFIX asm) - - if (CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_AMD64) - set(RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/GC.${ASM_SUFFIX}) - endif() else() include_directories(unix) @@ -249,6 +247,7 @@ else() endif() add_definitions(-DNO_UI_ASSERT) include(unix/configure.cmake) + include(${CLR_SRC_NATIVE_DIR}/minipal/configure.cmake) include_directories(${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index da9a5c4b97618..ec2fabcc540f1 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -39,6 +39,7 @@ #include "GCMemoryHelpers.inl" #include "yieldprocessornormalized.h" #include "RhConfig.h" +#include COOP_PINVOKE_HELPER(void, RhDebugBreak, ()) { diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 88785b4ec1412..6c363f80631e2 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -20,7 +20,6 @@ #include "CommonTypes.h" #include "CommonMacros.h" #include "gcenv.structs.h" // CRITICAL_SECTION -#include "IntrinsicConstants.h" #include "PalRedhawkCommon.h" #ifndef PAL_REDHAWK_INCLUDED @@ -776,31 +775,6 @@ REDHAWK_PALIMPORT char* PalCopyTCharAsChar(const TCHAR* toCopy); REDHAWK_PALIMPORT int32_t __cdecl _stricmp(const char *string1, const char *string2); #endif // TARGET_UNIX -#if defined(HOST_X86) || defined(HOST_AMD64) - -#ifdef TARGET_UNIX -// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures -// We define matching signatures for use on Unix platforms. -// -// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid - -REDHAWK_PALIMPORT void __cpuid(int cpuInfo[4], int function_id); -REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id); -#else -#include -#endif - -REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); -REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport(); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled(); - -#endif // defined(HOST_X86) || defined(HOST_AMD64) - -#if defined(HOST_ARM64) -REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags); -#endif //defined(HOST_ARM64) - #include "PalRedhawkInline.h" #endif // !PAL_REDHAWK_INCLUDED diff --git a/src/coreclr/nativeaot/Runtime/amd64/GC.asm b/src/coreclr/nativeaot/Runtime/amd64/GC.asm deleted file mode 100644 index 7c90f90538476..0000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/GC.asm +++ /dev/null @@ -1,36 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - -;; extern "C" DWORD __stdcall xmmYmmStateSupport(); -LEAF_ENTRY xmmYmmStateSupport, _TEXT - mov ecx, 0 ; Specify xcr0 - xgetbv ; result in EDX:EAX - and eax, 06H - cmp eax, 06H ; check OS has enabled both XMM and YMM state support - jne not_supported - mov eax, 1 - jmp done - not_supported: - mov eax, 0 - done: - ret -LEAF_END xmmYmmStateSupport, _TEXT - -;; extern "C" DWORD __stdcall avx512StateSupport(); -LEAF_ENTRY avx512StateSupport, _TEXT - mov ecx, 0 ; Specify xcr0 - xgetbv ; result in EDX:EAX - and eax, 0E6H - cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support - jne not_supported - mov eax, 1 - jmp done - not_supported: - mov eax, 0 - done: - ret -LEAF_END avx512StateSupport, _TEXT - - end diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index c01ada624f190..fd5c882b6bd60 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -288,7 +288,6 @@ RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@Ru EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC EXTERN g_fGcStressStarted : DWORD -EXTERN g_fHasFastFxsave : BYTE ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index d388030dfe79a..b7e59f6bc326e 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -25,6 +25,7 @@ #include "stressLog.h" #include "RestrictedCallouts.h" #include "yieldprocessornormalized.h" +#include #ifdef FEATURE_PERFTRACING #include "EventPipeInterface.h" @@ -48,9 +49,6 @@ static bool DetectCPUFeatures(); extern RhConfig * g_pRhConfig; -EXTERN_C bool g_fHasFastFxsave; -bool g_fHasFastFxsave = false; - CrstStatic g_ThunkPoolLock; #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) @@ -183,203 +181,7 @@ static bool InitDLL(HANDLE hPalInstance) bool DetectCPUFeatures() { #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) - -#if defined(HOST_X86) || defined(HOST_AMD64) - - int cpuidInfo[4]; - - const int CPUID_EAX = 0; - const int CPUID_EBX = 1; - const int CPUID_ECX = 2; - const int CPUID_EDX = 3; - - __cpuid(cpuidInfo, 0x00000000); - uint32_t maxCpuId = static_cast(cpuidInfo[CPUID_EAX]); - - if (maxCpuId >= 1) - { - __cpuid(cpuidInfo, 0x00000001); - - const int requiredBaselineEdxFlags = (1 << 25) // SSE - | (1 << 26); // SSE2 - - if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) - { - g_cpuFeatures |= XArchIntrinsicConstants_VectorT128; - - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI - { - g_cpuFeatures |= XArchIntrinsicConstants_Aes; - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ - { - g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq; - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 - { - g_cpuFeatures |= XArchIntrinsicConstants_Sse3; - - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 - { - g_cpuFeatures |= XArchIntrinsicConstants_Ssse3; - - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 - { - g_cpuFeatures |= XArchIntrinsicConstants_Sse41; - - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 - { - g_cpuFeatures |= XArchIntrinsicConstants_Sse42; - - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE - { - g_cpuFeatures |= XArchIntrinsicConstants_Movbe; - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT - { - g_cpuFeatures |= XArchIntrinsicConstants_Popcnt; - } - - const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX - - if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) - { - if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx; - - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA - { - g_cpuFeatures |= XArchIntrinsicConstants_Fma; - } - - if (maxCpuId >= 0x07) - { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx2; - g_cpuFeatures |= XArchIntrinsicConstants_VectorT256; - - if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 - { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; - g_cpuFeatures |= XArchIntrinsicConstants_VectorT512; - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl; - isAVX512_VLSupported = true; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl; - } - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL - { - g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl; - } - } - } - } - - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni; - } - } - } - } - } - } - } - } - } - } - - if (maxCpuId >= 0x07) - { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 - { - g_cpuFeatures |= XArchIntrinsicConstants_Bmi1; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 - { - g_cpuFeatures |= XArchIntrinsicConstants_Bmi2; - } - - if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) - { - g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE - } - } - } - - __cpuid(cpuidInfo, 0x80000000); - uint32_t maxCpuIdEx = static_cast(cpuidInfo[CPUID_EAX]); - - if (maxCpuIdEx >= 0x80000001) - { - __cpuid(cpuidInfo, 0x80000001); - - if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT - { - g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; - } - -#ifdef HOST_AMD64 - // AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers. The OS will enable this mode - // if it is supported. So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers. - // fxsr_opt is bit 25 of CPUID_EDX - if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0) - g_fHasFastFxsave = true; -#endif - } -#endif // HOST_X86 || HOST_AMD64 - -#if defined(HOST_ARM64) - PAL_GetCpuCapabilityFlags (&g_cpuFeatures); -#endif + g_cpuFeatures = minipal_getcpufeatures(); if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) { diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index 08d09f25e0c61..fba1cb0e8c8af 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -739,16 +739,6 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* return moduleHandle; } -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() -{ - return true; -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled() -{ - return true; -} - REDHAWK_PALEXPORT void PalPrintFatalError(const char* message) { // Write the message using lowest-level OS API available. This is used to print the stack overflow @@ -1294,271 +1284,3 @@ extern "C" uint64_t PalGetCurrentOSThreadId() #endif } -#if defined(HOST_X86) || defined(HOST_AMD64) -// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures -// We define matching signatures for use on Unix platforms. -// -// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid - -#if !__has_builtin(__cpuid) -REDHAWK_PALEXPORT void __cpuid(int cpuInfo[4], int function_id) -{ - // Based on the Clang implementation provided in cpuid.h: - // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h - - __asm(" cpuid\n" \ - : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ - : "0"(function_id) - ); -} -#endif - -#if !__has_builtin(__cpuidex) -REDHAWK_PALEXPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id) -{ - // Based on the Clang implementation provided in cpuid.h: - // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h - - __asm(" cpuid\n" \ - : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ - : "0"(function_id), "2"(subFunction_id) - ); -} -#endif - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport() -{ - DWORD eax; - __asm(" xgetbv\n" \ - : "=a"(eax) /*output in eax*/\ - : "c"(0) /*inputs - 0 in ecx*/\ - : "edx" /* registers that are clobbered*/ - ); - // check OS has enabled both XMM and YMM state support - return ((eax & 0x06) == 0x06) ? 1 : 0; -} - -#ifndef XSTATE_MASK_AVX512 -#define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ -#endif // XSTATE_MASK_AVX512 - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI avx512StateSupport() -{ -#if defined(TARGET_APPLE) - // MacOS has specialized behavior where it reports AVX512 support but doesnt - // actually enable AVX512 until the first instruction is executed and does so - // on a per thread basis. It does this by catching the faulting instruction and - // checking for the EVEX encoding. The kmov instructions, despite being part - // of the AVX512 instruction set are VEX encoded and dont trigger the enablement - // - // See https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/fpu.c#L174 - - // TODO-AVX512: Enabling this for OSX requires ensuring threads explicitly trigger - // the AVX-512 enablement so that arbitrary usage doesn't cause downstream problems - - return false; -#else - DWORD eax; - __asm(" xgetbv\n" \ - : "=a"(eax) /*output in eax*/\ - : "c"(0) /*inputs - 0 in ecx*/\ - : "edx" /* registers that are clobbered*/ - ); - // check OS has enabled XMM, YMM and ZMM state support - return ((eax & 0xE6) == 0x0E6) ? 1 : 0; -#endif -} - -#endif // defined(HOST_X86) || defined(HOST_AMD64) - -#if defined (HOST_ARM64) - -#if HAVE_AUXV_HWCAP_H -#include -#include -#endif - -#if HAVE_SYSCTLBYNAME -#include -#endif - -// Based on PAL_GetJitCpuCapabilityFlags from CoreCLR (jitsupport.cpp) -REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) -{ - *flags = 0; - -#if HAVE_AUXV_HWCAP_H - unsigned long hwCap = getauxval(AT_HWCAP); - - // HWCAP_* flags are introduced by ARM into the Linux kernel as new extensions are published. - // For a given kernel, some of these flags may not be present yet. - // Use ifdef for each to allow for compilation with any vintage kernel. - // From a single binary distribution perspective, compiling with latest kernel asm/hwcap.h should - // include all published flags. Given flags are merged to kernel and published before silicon is - // available, using the latest kernel for release should be sufficient. - -#ifdef HWCAP_AES - if (hwCap & HWCAP_AES) - *flags |= ARM64IntrinsicConstants_Aes; -#endif -#ifdef HWCAP_ATOMICS - if (hwCap & HWCAP_ATOMICS) - *flags |= ARM64IntrinsicConstants_Atomics; -#endif -#ifdef HWCAP_CRC32 - if (hwCap & HWCAP_CRC32) - *flags |= ARM64IntrinsicConstants_Crc32; -#endif -#ifdef HWCAP_DCPOP -// if (hwCap & HWCAP_DCPOP) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_ASIMDDP - if (hwCap & HWCAP_ASIMDDP) - *flags |= ARM64IntrinsicConstants_Dp; -#endif -#ifdef HWCAP_FCMA -// if (hwCap & HWCAP_FCMA) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_FP -// if (hwCap & HWCAP_FP) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_FPHP -// if (hwCap & HWCAP_FPHP) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_JSCVT -// if (hwCap & HWCAP_JSCVT) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_LRCPC - if (hwCap & HWCAP_LRCPC) - *flags |= ARM64IntrinsicConstants_Rcpc; -#endif -#ifdef HWCAP_PMULL -// if (hwCap & HWCAP_PMULL) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_SHA1 - if (hwCap & HWCAP_SHA1) - *flags |= ARM64IntrinsicConstants_Sha1; -#endif -#ifdef HWCAP_SHA2 - if (hwCap & HWCAP_SHA2) - *flags |= ARM64IntrinsicConstants_Sha256; -#endif -#ifdef HWCAP_SHA512 -// if (hwCap & HWCAP_SHA512) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_SHA3 -// if (hwCap & HWCAP_SHA3) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_ASIMD - if (hwCap & HWCAP_ASIMD) - *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; -#endif -#ifdef HWCAP_ASIMDRDM - if (hwCap & HWCAP_ASIMDRDM) - *flags |= ARM64IntrinsicConstants_Rdm; -#endif -#ifdef HWCAP_ASIMDHP -// if (hwCap & HWCAP_ASIMDHP) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_SM3 -// if (hwCap & HWCAP_SM3) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_SM4 -// if (hwCap & HWCAP_SM4) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP_SVE -// if (hwCap & HWCAP_SVE) -// *flags |= ARM64IntrinsicConstants_???; -#endif - -#ifdef AT_HWCAP2 - unsigned long hwCap2 = getauxval(AT_HWCAP2); - -#ifdef HWCAP2_DCPODP -// if (hwCap2 & HWCAP2_DCPODP) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVE2 -// if (hwCap2 & HWCAP2_SVE2) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVEAES -// if (hwCap2 & HWCAP2_SVEAES) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVEPMULL -// if (hwCap2 & HWCAP2_SVEPMULL) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVEBITPERM -// if (hwCap2 & HWCAP2_SVEBITPERM) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVESHA3 -// if (hwCap2 & HWCAP2_SVESHA3) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_SVESM4 -// if (hwCap2 & HWCAP2_SVESM4) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_FLAGM2 -// if (hwCap2 & HWCAP2_FLAGM2) -// *flags |= ARM64IntrinsicConstants_???; -#endif -#ifdef HWCAP2_FRINT -// if (hwCap2 & HWCAP2_FRINT) -// *flags |= ARM64IntrinsicConstants_???; -#endif - -#endif // AT_HWCAP2 - -#else // !HAVE_AUXV_HWCAP_H - -#if HAVE_SYSCTLBYNAME - int64_t valueFromSysctl = 0; - size_t sz = sizeof(valueFromSysctl); - - if ((sysctlbyname("hw.optional.arm.FEAT_AES", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Aes; - - if ((sysctlbyname("hw.optional.armv8_crc32", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Crc32; - - if ((sysctlbyname("hw.optional.arm.FEAT_DotProd", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Dp; - - if ((sysctlbyname("hw.optional.arm.FEAT_RDM", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Rdm; - - if ((sysctlbyname("hw.optional.arm.FEAT_SHA1", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Sha1; - - if ((sysctlbyname("hw.optional.arm.FEAT_SHA256", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Sha256; - - if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Atomics; - - if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - *flags |= ARM64IntrinsicConstants_Rcpc; -#endif // HAVE_SYSCTLBYNAME - - // Every ARM64 CPU should support SIMD and FP - // If the OS have no function to query for CPU capabilities we set just these - - *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; -#endif // HAVE_AUXV_HWCAP_H -} -#endif diff --git a/src/coreclr/nativeaot/Runtime/unix/configure.cmake b/src/coreclr/nativeaot/Runtime/unix/configure.cmake index d93657613a6cc..798f6a178ee49 100644 --- a/src/coreclr/nativeaot/Runtime/unix/configure.cmake +++ b/src/coreclr/nativeaot/Runtime/unix/configure.cmake @@ -15,8 +15,6 @@ endif() list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_FILE_OFFSET_BITS=64) -check_include_files("sys/auxv.h;asm/hwcap.h" HAVE_AUXV_HWCAP_H) - check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) check_library_exists(c pthread_create "" HAVE_PTHREAD_IN_LIBC) @@ -32,7 +30,6 @@ check_library_exists(${PTHREAD_LIBRARY} pthread_condattr_setclock "" HAVE_PTHREA check_library_exists(${PTHREAD_LIBRARY} pthread_getthreadid_np "" HAVE_PTHREAD_GETTHREADID_NP) check_function_exists(clock_nanosleep HAVE_CLOCK_NANOSLEEP) -check_function_exists(sysctlbyname HAVE_SYSCTLBYNAME) check_struct_has_member ("ucontext_t" uc_mcontext.gregs[0] ucontext.h HAVE_GREGSET_T) check_struct_has_member ("ucontext_t" uc_mcontext.__gregs[0] ucontext.h HAVE___GREGSET_T) diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 902483765b20d..964a3b52b09e4 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -667,56 +667,6 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* return (HANDLE)module; } -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvxEnabled() -{ - typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = LoadKernel32dll(); - if (hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX) == 0) - { - return FALSE; - } - - return TRUE; -} - -REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalIsAvx512Enabled() -{ - typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = LoadKernel32dll(); - if (hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX512) == 0) - { - return FALSE; - } - - return TRUE; -} - REDHAWK_PALEXPORT void* REDHAWK_PALAPI PalAddVectoredExceptionHandler(uint32_t firstHandler, _In_ PVECTORED_EXCEPTION_HANDLER vectoredHandler) { return AddVectoredExceptionHandler(firstHandler, vectoredHandler); @@ -770,55 +720,3 @@ REDHAWK_PALEXPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size FlushInstructionCache(GetCurrentProcess(), pAddress, size); } -#if defined(HOST_ARM64) - -#include "IntrinsicConstants.h" - -REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) -{ - *flags = 0; - -// Older version of SDK would return false for these intrinsics -// but make sure we pass the right values to the APIs -#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34 -#endif -#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 -#endif -#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 -#endif - - // FP and SIMD support are enabled by default - *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; - - if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) - { - *flags |= ARM64IntrinsicConstants_Aes; - *flags |= ARM64IntrinsicConstants_Sha1; - *flags |= ARM64IntrinsicConstants_Sha256; - } - - if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) - { - *flags |= ARM64IntrinsicConstants_Crc32; - } - - if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) - { - *flags |= ARM64IntrinsicConstants_Atomics; - } - - if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) - { - *flags |= ARM64IntrinsicConstants_Dp; - } - - if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)) - { - *flags |= ARM64IntrinsicConstants_Rcpc; - } -} - -#endif diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index a523f1c5f0979..28c19ee96de77 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -1,6 +1,10 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; +using System.Diagnostics; + +using Internal.JitInterface; using Internal.TypeSystem; namespace ILCompiler @@ -14,5 +18,264 @@ public static bool IsHardwareIntrinsic(MethodDesc method) { return !string.IsNullOrEmpty(InstructionSetSupport.GetHardwareIntrinsicId(method.Context.Target.Architecture, method.OwningType)); } + + public static void AddRuntimeRequiredIsaFlagsToBuilder(InstructionSetSupportBuilder builder, int flags) + { + switch (builder.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + XArchIntrinsicConstants.AddToBuilder(builder, flags); + break; + case TargetArchitecture.ARM64: + Arm64IntrinsicConstants.AddToBuilder(builder, flags); + break; + default: + Debug.Fail("Probably unimplemented"); + break; + } + } + + // Keep these enumerations in sync with cpufeatures.h in the minipal. + private static class XArchIntrinsicConstants + { + // SSE and SSE2 are baseline ISAs - they're always available + public const int Aes = 0x0001; + public const int Pclmulqdq = 0x0002; + public const int Sse3 = 0x0004; + public const int Ssse3 = 0x0008; + public const int Sse41 = 0x0010; + public const int Sse42 = 0x0020; + public const int Popcnt = 0x0040; + public const int Avx = 0x0080; + public const int Fma = 0x0100; + public const int Avx2 = 0x0200; + public const int Bmi1 = 0x0400; + public const int Bmi2 = 0x0800; + public const int Lzcnt = 0x1000; + public const int AvxVnni = 0x2000; + public const int Movbe = 0x4000; + public const int Avx512f = 0x8000; + public const int Avx512f_vl = 0x10000; + public const int Avx512bw = 0x20000; + public const int Avx512bw_vl = 0x40000; + public const int Avx512cd = 0x80000; + public const int Avx512cd_vl = 0x100000; + public const int Avx512dq = 0x200000; + public const int Avx512dq_vl = 0x400000; + public const int Avx512Vbmi = 0x800000; + public const int Avx512Vbmi_vl = 0x1000000; + public const int Serialize = 0x2000000; + public const int VectorT128 = 0x4000000; + public const int VectorT256 = 0x8000000; + public const int VectorT512 = 0x10000000; + + public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) + { + if ((flags & Aes) != 0) + builder.AddSupportedInstructionSet("aes"); + if ((flags & Pclmulqdq) != 0) + builder.AddSupportedInstructionSet("pclmul"); + if ((flags & Sse3) != 0) + builder.AddSupportedInstructionSet("sse3"); + if ((flags & Ssse3) != 0) + builder.AddSupportedInstructionSet("ssse3"); + if ((flags & Sse41) != 0) + builder.AddSupportedInstructionSet("sse4.1"); + if ((flags & Sse42) != 0) + builder.AddSupportedInstructionSet("sse4.2"); + if ((flags & Popcnt) != 0) + builder.AddSupportedInstructionSet("popcnt"); + if ((flags & Avx) != 0) + builder.AddSupportedInstructionSet("avx"); + if ((flags & Fma) != 0) + builder.AddSupportedInstructionSet("fma"); + if ((flags & Avx2) != 0) + builder.AddSupportedInstructionSet("avx2"); + if ((flags & Bmi1) != 0) + builder.AddSupportedInstructionSet("bmi"); + if ((flags & Bmi2) != 0) + builder.AddSupportedInstructionSet("bmi2"); + if ((flags & Lzcnt) != 0) + builder.AddSupportedInstructionSet("lzcnt"); + if ((flags & AvxVnni) != 0) + builder.AddSupportedInstructionSet("avxvnni"); + if ((flags & Movbe) != 0) + builder.AddSupportedInstructionSet("movbe"); + if ((flags & Avx512f) != 0) + builder.AddSupportedInstructionSet("avx512f"); + if ((flags & Avx512f_vl) != 0) + builder.AddSupportedInstructionSet("avx512f_vl"); + if ((flags & Avx512bw) != 0) + builder.AddSupportedInstructionSet("avx512bw"); + if ((flags & Avx512bw_vl) != 0) + builder.AddSupportedInstructionSet("avx512bw_vl"); + if ((flags & Avx512cd) != 0) + builder.AddSupportedInstructionSet("avx512cd"); + if ((flags & Avx512cd_vl) != 0) + builder.AddSupportedInstructionSet("avx512cd_vl"); + if ((flags & Avx512dq) != 0) + builder.AddSupportedInstructionSet("avx512dq"); + if ((flags & Avx512dq_vl) != 0) + builder.AddSupportedInstructionSet("avx512dq_vl"); + if ((flags & Avx512Vbmi) != 0) + builder.AddSupportedInstructionSet("avx512vbmi"); + if ((flags & Avx512Vbmi_vl) != 0) + builder.AddSupportedInstructionSet("avx512vbmi_vl"); + if ((flags & Serialize) != 0) + builder.AddSupportedInstructionSet("serialize"); + } + + public static int FromInstructionSet(InstructionSet instructionSet) + { + Debug.Assert(InstructionSet.X64_AES == InstructionSet.X86_AES); + Debug.Assert(InstructionSet.X64_SSE41 == InstructionSet.X86_SSE41); + Debug.Assert(InstructionSet.X64_LZCNT == InstructionSet.X86_LZCNT); + + return instructionSet switch + { + // Optional ISAs - only available via opt-in or opportunistic light-up + InstructionSet.X64_AES => Aes, + InstructionSet.X64_AES_X64 => Aes, + InstructionSet.X64_PCLMULQDQ => Pclmulqdq, + InstructionSet.X64_PCLMULQDQ_X64 => Pclmulqdq, + InstructionSet.X64_SSE3 => Sse3, + InstructionSet.X64_SSE3_X64 => Sse3, + InstructionSet.X64_SSSE3 => Ssse3, + InstructionSet.X64_SSSE3_X64 => Ssse3, + InstructionSet.X64_SSE41 => Sse41, + InstructionSet.X64_SSE41_X64 => Sse41, + InstructionSet.X64_SSE42 => Sse42, + InstructionSet.X64_SSE42_X64 => Sse42, + InstructionSet.X64_POPCNT => Popcnt, + InstructionSet.X64_POPCNT_X64 => Popcnt, + InstructionSet.X64_AVX => Avx, + InstructionSet.X64_AVX_X64 => Avx, + InstructionSet.X64_FMA => Fma, + InstructionSet.X64_FMA_X64 => Fma, + InstructionSet.X64_AVX2 => Avx2, + InstructionSet.X64_AVX2_X64 => Avx2, + InstructionSet.X64_BMI1 => Bmi1, + InstructionSet.X64_BMI1_X64 => Bmi1, + InstructionSet.X64_BMI2 => Bmi2, + InstructionSet.X64_BMI2_X64 => Bmi2, + InstructionSet.X64_LZCNT => Lzcnt, + InstructionSet.X64_LZCNT_X64 => Lzcnt, + InstructionSet.X64_AVXVNNI => AvxVnni, + InstructionSet.X64_AVXVNNI_X64 => AvxVnni, + InstructionSet.X64_MOVBE => Movbe, + InstructionSet.X64_MOVBE_X64 => Movbe, + InstructionSet.X64_AVX512F => Avx512f, + InstructionSet.X64_AVX512F_X64 => Avx512f, + InstructionSet.X64_AVX512F_VL => Avx512f_vl, + InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, + InstructionSet.X64_AVX512BW => Avx512bw, + InstructionSet.X64_AVX512BW_X64 => Avx512bw, + InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, + InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, + InstructionSet.X64_AVX512CD => Avx512cd, + InstructionSet.X64_AVX512CD_X64 => Avx512cd, + InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, + InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, + InstructionSet.X64_AVX512DQ => Avx512dq, + InstructionSet.X64_AVX512DQ_X64 => Avx512dq, + InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, + InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, + InstructionSet.X64_AVX512VBMI => Avx512Vbmi, + InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, + InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, + InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, + InstructionSet.X64_X86Serialize => Serialize, + InstructionSet.X64_X86Serialize_X64 => Serialize, + + // Baseline ISAs - they're always available + InstructionSet.X64_SSE => 0, + InstructionSet.X64_SSE_X64 => 0, + InstructionSet.X64_SSE2 => 0, + InstructionSet.X64_SSE2_X64 => 0, + + InstructionSet.X64_X86Base => 0, + InstructionSet.X64_X86Base_X64 => 0, + + // Vector Sizes + InstructionSet.X64_VectorT128 => VectorT128, + InstructionSet.X64_VectorT256 => VectorT256, + InstructionSet.X64_VectorT512 => VectorT512, + + _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) + }; + } + } + + // Keep these enumerations in sync with cpufeatures.h in the minipal. + private static class Arm64IntrinsicConstants + { + public const int AdvSimd = 0x0001; + public const int Aes = 0x0002; + public const int Crc32 = 0x0004; + public const int Dp = 0x0008; + public const int Rdm = 0x0010; + public const int Sha1 = 0x0020; + public const int Sha256 = 0x0040; + public const int Atomics = 0x0080; + public const int Rcpc = 0x0100; + public const int VectorT128 = 0x0200; + + public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) + { + if ((flags & AdvSimd) != 0) + builder.AddSupportedInstructionSet("neon"); + if ((flags & Aes) != 0) + builder.AddSupportedInstructionSet("aes"); + if ((flags & Crc32) != 0) + builder.AddSupportedInstructionSet("crc"); + if ((flags & Dp) != 0) + builder.AddSupportedInstructionSet("dotprod"); + if ((flags & Rdm) != 0) + builder.AddSupportedInstructionSet("rdma"); + if ((flags & Sha1) != 0) + builder.AddSupportedInstructionSet("sha1"); + if ((flags & Sha256) != 0) + builder.AddSupportedInstructionSet("sha2"); + if ((flags & Atomics) != 0) + builder.AddSupportedInstructionSet("lse"); + if ((flags & Rcpc) != 0) + builder.AddSupportedInstructionSet("rcpc"); + } + + public static int FromInstructionSet(InstructionSet instructionSet) + { + return instructionSet switch + { + + // Baseline ISAs - they're always available + InstructionSet.ARM64_ArmBase => 0, + InstructionSet.ARM64_ArmBase_Arm64 => 0, + InstructionSet.ARM64_AdvSimd => AdvSimd, + InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, + + // Optional ISAs - only available via opt-in or opportunistic light-up + InstructionSet.ARM64_Aes => Aes, + InstructionSet.ARM64_Aes_Arm64 => Aes, + InstructionSet.ARM64_Crc32 => Crc32, + InstructionSet.ARM64_Crc32_Arm64 => Crc32, + InstructionSet.ARM64_Dp => Dp, + InstructionSet.ARM64_Dp_Arm64 => Dp, + InstructionSet.ARM64_Rdm => Rdm, + InstructionSet.ARM64_Rdm_Arm64 => Rdm, + InstructionSet.ARM64_Sha1 => Sha1, + InstructionSet.ARM64_Sha1_Arm64 => Sha1, + InstructionSet.ARM64_Sha256 => Sha256, + InstructionSet.ARM64_Sha256_Arm64 => Sha256, + InstructionSet.ARM64_Atomics => Atomics, + InstructionSet.ARM64_Rcpc => Rcpc, + + // Vector Sizes + InstructionSet.ARM64_VectorT128 => VectorT128, + + _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) + }; + } + } } } diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index f2805e09ab230..d66eee9833a66 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -211,6 +211,8 @@ public static InstructionSetFlags GetNonSpecifiableInstructionSetsForArch(Target private readonly SortedSet _unsupportedInstructionSets; private readonly TargetArchitecture _architecture; + public TargetArchitecture Architecture => _architecture; + public InstructionSetSupportBuilder(TargetArchitecture architecture) { _supportedInstructionSets = new SortedSet(); @@ -225,6 +227,13 @@ public InstructionSetSupportBuilder(InstructionSetSupportBuilder other) _architecture = other._architecture; } + public override string ToString() + => (_supportedInstructionSets.Count > 0 ? "+" : "") + + string.Join(",+", _supportedInstructionSets) + + (_supportedInstructionSets.Count > 0 && _unsupportedInstructionSets.Count > 0 ? "," : "") + + (_unsupportedInstructionSets.Count > 0 ? "-" : "") + + string.Join(",-", _unsupportedInstructionSets); + /// /// Add a supported instruction set to the specified list. /// diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index fb672605e1718..124d71e490a93 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -3,8 +3,12 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Runtime.InteropServices; + using ILCompiler; + using Internal.TypeSystem; + using InstructionSet = Internal.JitInterface.InstructionSet; namespace System.CommandLine @@ -12,7 +16,7 @@ namespace System.CommandLine internal static partial class Helpers { public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, bool isVectorTOptimistic, TargetArchitecture targetArchitecture, TargetOS targetOS, - string mustNotBeMessage, string invalidImplicationMessage) + string mustNotBeMessage, string invalidImplicationMessage, Logger logger) { InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture); @@ -34,7 +38,27 @@ internal static partial class Helpers } } - if (instructionSet != null) + if (instructionSet == "native") + { + if (GetTargetArchitecture(null) != targetArchitecture) + { + throw new CommandLineException("Instruction set 'native' not supported when cross-compiling to a different architecture."); + } + + string jitInterfaceLibrary = "jitinterface_" + RuntimeInformation.ProcessArchitecture.ToString().ToLowerInvariant(); + nint libHandle = NativeLibrary.Load(jitInterfaceLibrary, System.Reflection.Assembly.GetExecutingAssembly(), DllImportSearchPath.ApplicationDirectory); + int cpuFeatures; + unsafe + { + var getCpuFeatures = (delegate* unmanaged)NativeLibrary.GetExport(libHandle, "JitGetProcessorFeatures"); + cpuFeatures = getCpuFeatures(); + } + HardwareIntrinsicHelpers.AddRuntimeRequiredIsaFlagsToBuilder(instructionSetSupportBuilder, cpuFeatures); + + if (logger.IsVerbose) + logger.LogMessage($"The 'native' instruction set expanded to {instructionSetSupportBuilder}"); + } + else if (instructionSet != null) { List instructionSetParams = new List(); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index cd7a616c0698b..ee355b964404d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -81,168 +81,5 @@ public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSe } return result; } - - // Keep these enumerations in sync with startup.cpp in the native runtime. - private static class XArchIntrinsicConstants - { - // SSE and SSE2 are baseline ISAs - they're always available - public const int Aes = 0x0001; - public const int Pclmulqdq = 0x0002; - public const int Sse3 = 0x0004; - public const int Ssse3 = 0x0008; - public const int Sse41 = 0x0010; - public const int Sse42 = 0x0020; - public const int Popcnt = 0x0040; - public const int Avx = 0x0080; - public const int Fma = 0x0100; - public const int Avx2 = 0x0200; - public const int Bmi1 = 0x0400; - public const int Bmi2 = 0x0800; - public const int Lzcnt = 0x1000; - public const int AvxVnni = 0x2000; - public const int Movbe = 0x4000; - public const int Avx512f = 0x8000; - public const int Avx512f_vl = 0x10000; - public const int Avx512bw = 0x20000; - public const int Avx512bw_vl = 0x40000; - public const int Avx512cd = 0x80000; - public const int Avx512cd_vl = 0x100000; - public const int Avx512dq = 0x200000; - public const int Avx512dq_vl = 0x400000; - public const int Avx512Vbmi = 0x800000; - public const int Avx512Vbmi_vl = 0x1000000; - public const int Serialize = 0x2000000; - public const int VectorT128 = 0x4000000; - public const int VectorT256 = 0x8000000; - public const int VectorT512 = 0x10000000; - - public static int FromInstructionSet(InstructionSet instructionSet) - { - Debug.Assert(InstructionSet.X64_AES == InstructionSet.X86_AES); - Debug.Assert(InstructionSet.X64_SSE41 == InstructionSet.X86_SSE41); - Debug.Assert(InstructionSet.X64_LZCNT == InstructionSet.X86_LZCNT); - - return instructionSet switch - { - // Optional ISAs - only available via opt-in or opportunistic light-up - InstructionSet.X64_AES => Aes, - InstructionSet.X64_AES_X64 => Aes, - InstructionSet.X64_PCLMULQDQ => Pclmulqdq, - InstructionSet.X64_PCLMULQDQ_X64 => Pclmulqdq, - InstructionSet.X64_SSE3 => Sse3, - InstructionSet.X64_SSE3_X64 => Sse3, - InstructionSet.X64_SSSE3 => Ssse3, - InstructionSet.X64_SSSE3_X64 => Ssse3, - InstructionSet.X64_SSE41 => Sse41, - InstructionSet.X64_SSE41_X64 => Sse41, - InstructionSet.X64_SSE42 => Sse42, - InstructionSet.X64_SSE42_X64 => Sse42, - InstructionSet.X64_POPCNT => Popcnt, - InstructionSet.X64_POPCNT_X64 => Popcnt, - InstructionSet.X64_AVX => Avx, - InstructionSet.X64_AVX_X64 => Avx, - InstructionSet.X64_FMA => Fma, - InstructionSet.X64_FMA_X64 => Fma, - InstructionSet.X64_AVX2 => Avx2, - InstructionSet.X64_AVX2_X64 => Avx2, - InstructionSet.X64_BMI1 => Bmi1, - InstructionSet.X64_BMI1_X64 => Bmi1, - InstructionSet.X64_BMI2 => Bmi2, - InstructionSet.X64_BMI2_X64 => Bmi2, - InstructionSet.X64_LZCNT => Lzcnt, - InstructionSet.X64_LZCNT_X64 => Lzcnt, - InstructionSet.X64_AVXVNNI => AvxVnni, - InstructionSet.X64_AVXVNNI_X64 => AvxVnni, - InstructionSet.X64_MOVBE => Movbe, - InstructionSet.X64_MOVBE_X64 => Movbe, - InstructionSet.X64_AVX512F => Avx512f, - InstructionSet.X64_AVX512F_X64 => Avx512f, - InstructionSet.X64_AVX512F_VL => Avx512f_vl, - InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, - InstructionSet.X64_AVX512BW => Avx512bw, - InstructionSet.X64_AVX512BW_X64 => Avx512bw, - InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, - InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, - InstructionSet.X64_AVX512CD => Avx512cd, - InstructionSet.X64_AVX512CD_X64 => Avx512cd, - InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, - InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, - InstructionSet.X64_AVX512DQ => Avx512dq, - InstructionSet.X64_AVX512DQ_X64 => Avx512dq, - InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, - InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, - InstructionSet.X64_AVX512VBMI => Avx512Vbmi, - InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, - InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, - InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, - InstructionSet.X64_X86Serialize => Serialize, - InstructionSet.X64_X86Serialize_X64 => Serialize, - - // Baseline ISAs - they're always available - InstructionSet.X64_SSE => 0, - InstructionSet.X64_SSE_X64 => 0, - InstructionSet.X64_SSE2 => 0, - InstructionSet.X64_SSE2_X64 => 0, - - InstructionSet.X64_X86Base => 0, - InstructionSet.X64_X86Base_X64 => 0, - - // Vector Sizes - InstructionSet.X64_VectorT128 => VectorT128, - InstructionSet.X64_VectorT256 => VectorT256, - InstructionSet.X64_VectorT512 => VectorT512, - - _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) - }; - } - } - - private static class Arm64IntrinsicConstants - { - public const int AdvSimd = 0x0001; - public const int Aes = 0x0002; - public const int Crc32 = 0x0004; - public const int Dp = 0x0008; - public const int Rdm = 0x0010; - public const int Sha1 = 0x0020; - public const int Sha256 = 0x0040; - public const int Atomics = 0x0080; - public const int Rcpc = 0x0100; - public const int VectorT128 = 0x0200; - - public static int FromInstructionSet(InstructionSet instructionSet) - { - return instructionSet switch - { - - // Baseline ISAs - they're always available - InstructionSet.ARM64_ArmBase => 0, - InstructionSet.ARM64_ArmBase_Arm64 => 0, - InstructionSet.ARM64_AdvSimd => AdvSimd, - InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, - - // Optional ISAs - only available via opt-in or opportunistic light-up - InstructionSet.ARM64_Aes => Aes, - InstructionSet.ARM64_Aes_Arm64 => Aes, - InstructionSet.ARM64_Crc32 => Crc32, - InstructionSet.ARM64_Crc32_Arm64 => Crc32, - InstructionSet.ARM64_Dp => Dp, - InstructionSet.ARM64_Dp_Arm64 => Dp, - InstructionSet.ARM64_Rdm => Rdm, - InstructionSet.ARM64_Rdm_Arm64 => Rdm, - InstructionSet.ARM64_Sha1 => Sha1, - InstructionSet.ARM64_Sha1_Arm64 => Sha1, - InstructionSet.ARM64_Sha256 => Sha256, - InstructionSet.ARM64_Sha256_Arm64 => Sha256, - InstructionSet.ARM64_Atomics => Atomics, - InstructionSet.ARM64_Rcpc => Rcpc, - - // Vector Sizes - InstructionSet.ARM64_VectorT128 => VectorT128, - - _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) - }; - } - } } } diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index 9a8593575a169..e8e80802788ad 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -66,6 +66,17 @@ public int Run() if (outputFilePath == null) throw new CommandLineException("Output filename must be specified (/out )"); + var suppressedWarningCategories = new List(); + if (Get(_command.NoTrimWarn)) + suppressedWarningCategories.Add(MessageSubCategory.TrimAnalysis); + if (Get(_command.NoAotWarn)) + suppressedWarningCategories.Add(MessageSubCategory.AotAnalysis); + + ILProvider ilProvider = new NativeAotILProvider(); + + var logger = new Logger(Console.Out, ilProvider, Get(_command.IsVerbose), ProcessWarningCodes(Get(_command.SuppressedWarnings)), + Get(_command.SingleWarn), Get(_command.SingleWarnEnabledAssemblies), Get(_command.SingleWarnDisabledAssemblies), suppressedWarningCategories); + // NativeAOT is full AOT and its pre-compiled methods can not be // thrown away at runtime if they mismatch in required ISAs or // computed layouts of structs. The worst case scenario is simply @@ -77,7 +88,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), isVectorTOptimistic, targetArchitecture, targetOS, - "Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}"); + "Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}", logger); string systemModuleName = Get(_command.SystemModuleName); string reflectionData = Get(_command.ReflectionData); @@ -324,17 +335,6 @@ public int Run() PInvokeILEmitterConfiguration pinvokePolicy = new ConfigurablePInvokePolicy(typeSystemContext.Target, Get(_command.DirectPInvokes), Get(_command.DirectPInvokeLists)); - ILProvider ilProvider = new NativeAotILProvider(); - - var suppressedWarningCategories = new List(); - if (Get(_command.NoTrimWarn)) - suppressedWarningCategories.Add(MessageSubCategory.TrimAnalysis); - if (Get(_command.NoAotWarn)) - suppressedWarningCategories.Add(MessageSubCategory.AotAnalysis); - - var logger = new Logger(Console.Out, ilProvider, Get(_command.IsVerbose), ProcessWarningCodes(Get(_command.SuppressedWarnings)), - Get(_command.SingleWarn), Get(_command.SingleWarnEnabledAssemblies), Get(_command.SingleWarnDisabledAssemblies), suppressedWarningCategories); - var featureSwitches = new Dictionary(); foreach (var switchPair in Get(_command.FeatureSwitches)) { diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index f8e7239292ec3..74b8e4feb771c 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -74,6 +74,8 @@ public int Run() if (_singleFileCompilation && !_outNearInput) throw new CommandLineException(SR.MissingOutNearInput); + var logger = new Logger(Console.Out, Get(_command.IsVerbose)); + // Crossgen2 is partial AOT and its pre-compiled methods can be // thrown away at runtime if they mismatch in required ISAs or // computed layouts of structs. Thus we want to ensure that usage @@ -84,7 +86,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), isVectorTOptimistic, targetArchitecture, targetOS, - SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication); + SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication, logger); SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes; var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector()); @@ -279,7 +281,7 @@ public int Run() typeSystemContext.SetSystemModule((EcmaModule)typeSystemContext.GetModuleForSimpleName(systemModuleName)); } - RunSingleCompilation(singleCompilationInputFilePaths, instructionSetSupport, compositeRootPath, unrootedInputFilePaths, singleCompilationVersionBubbleModulesHash, typeSystemContext); + RunSingleCompilation(singleCompilationInputFilePaths, instructionSetSupport, compositeRootPath, unrootedInputFilePaths, singleCompilationVersionBubbleModulesHash, typeSystemContext, logger); } // In case of inputbubble ni.dll are created as ni.dll.tmp in order to not interfere with crossgen2, move them all to ni.dll @@ -297,13 +299,13 @@ public int Run() } else { - RunSingleCompilation(inputFilePaths, instructionSetSupport, compositeRootPath, unrootedInputFilePaths, versionBubbleModulesHash, typeSystemContext); + RunSingleCompilation(inputFilePaths, instructionSetSupport, compositeRootPath, unrootedInputFilePaths, versionBubbleModulesHash, typeSystemContext, logger); } return 0; } - private void RunSingleCompilation(Dictionary inFilePaths, InstructionSetSupport instructionSetSupport, string compositeRootPath, Dictionary unrootedInputFilePaths, HashSet versionBubbleModulesHash, ReadyToRunCompilerContext typeSystemContext) + private void RunSingleCompilation(Dictionary inFilePaths, InstructionSetSupport instructionSetSupport, string compositeRootPath, Dictionary unrootedInputFilePaths, HashSet versionBubbleModulesHash, ReadyToRunCompilerContext typeSystemContext, Logger logger) { // // Initialize output filename @@ -384,8 +386,6 @@ private void RunSingleCompilation(Dictionary inFilePaths, Instru // Single method mode? MethodDesc singleMethod = CheckAndParseSingleMethodModeArguments(typeSystemContext); - var logger = new Logger(Console.Out, Get(_command.IsVerbose)); - List mibcFiles = new List(); foreach (var file in Get(_command.MibcFilePaths)) { diff --git a/src/coreclr/tools/aot/jitinterface/CMakeLists.txt b/src/coreclr/tools/aot/jitinterface/CMakeLists.txt index 0b1cdc33fd591..6a612e1486985 100644 --- a/src/coreclr/tools/aot/jitinterface/CMakeLists.txt +++ b/src/coreclr/tools/aot/jitinterface/CMakeLists.txt @@ -5,11 +5,16 @@ set(NATIVE_SOURCES jitinterface.cpp jitwrapper.cpp corinfoexception.cpp + + ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c ) if(CLR_CMAKE_TARGET_WIN32) set(JITINTERFACE_RESOURCES Native.rc) -endif(CLR_CMAKE_TARGET_WIN32) +else() + include(${CLR_SRC_NATIVE_DIR}/minipal/configure.cmake) + include_directories(${CMAKE_CURRENT_BINARY_DIR}) +endif() add_library_clr(jitinterface_${ARCH_HOST_NAME} SHARED diff --git a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp index 1a091b2fab197..8fd38d192f84e 100644 --- a/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/tools/aot/jitinterface/jitwrapper.cpp @@ -5,6 +5,8 @@ #include #include +#include + #include "dllexport.h" #include "jitinterface_generated.h" @@ -50,3 +52,8 @@ DLL_EXPORT void JitProcessShutdownWork(ICorJitCompiler * pJit) { return pJit->ProcessShutdownWork(nullptr); } + +DLL_EXPORT int JitGetProcessorFeatures() +{ + return minipal_getcpufeatures(); +} diff --git a/src/native/minipal/configure.cmake b/src/native/minipal/configure.cmake new file mode 100644 index 0000000000000..c7b7715c1d78a --- /dev/null +++ b/src/native/minipal/configure.cmake @@ -0,0 +1,7 @@ +include(CheckFunctionExists) +include(CheckIncludeFiles) + +check_include_files("sys/auxv.h;asm/hwcap.h" HAVE_AUXV_HWCAP_H) +check_function_exists(sysctlbyname HAVE_SYSCTLBYNAME) + +configure_file(${CMAKE_CURRENT_LIST_DIR}/minipalconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/minipalconfig.h) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c new file mode 100644 index 0000000000000..b91b37bf88580 --- /dev/null +++ b/src/native/minipal/cpufeatures.c @@ -0,0 +1,573 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include + +#include "cpufeatures.h" +#include "cpuid.h" + +#if TARGET_WINDOWS + +#include + +#else // TARGET_WINDOWS + +#include "minipalconfig.h" + +#if HAVE_AUXV_HWCAP_H +#include +#include +#endif + +#if HAVE_SYSCTLBYNAME +#include +#endif + +#endif // !TARGET_WINDOWS + +#if defined(TARGET_UNIX) +#if defined(TARGET_X86) || defined(TARGET_AMD64) + +static uint32_t xmmYmmStateSupport() +{ + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled both XMM and YMM state support + return ((eax & 0x06) == 0x06) ? 1 : 0; +} + +#ifndef XSTATE_MASK_AVX512 +#define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ +#endif // XSTATE_MASK_AVX512 + +static uint32_t avx512StateSupport() +{ +#if defined(TARGET_APPLE) + // MacOS has specialized behavior where it reports AVX512 support but doesnt + // actually enable AVX512 until the first instruction is executed and does so + // on a per thread basis. It does this by catching the faulting instruction and + // checking for the EVEX encoding. The kmov instructions, despite being part + // of the AVX512 instruction set are VEX encoded and dont trigger the enablement + // + // See https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/fpu.c#L174 + + // TODO-AVX512: Enabling this for OSX requires ensuring threads explicitly trigger + // the AVX-512 enablement so that arbitrary usage doesn't cause downstream problems + + return false; +#else + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + // check OS has enabled XMM, YMM and ZMM state support + return ((eax & 0xE6) == 0x0E6) ? 1 : 0; +#endif +} + +static bool IsAvxEnabled() +{ + return true; +} + +static bool IsAvx512Enabled() +{ + return true; +} +#endif // defined(TARGET_X86) || defined(TARGET_AMD64) +#endif // TARGET_UNIX + +#if defined(TARGET_WINDOWS) +#if defined(TARGET_X86) || defined(TARGET_AMD64) +static uint32_t xmmYmmStateSupport() +{ + // check OS has enabled both XMM and YMM state support + return ((_xgetbv(0) & 0x06) == 0x06) ? 1 : 0; +} + +static uint32_t avx512StateSupport() +{ + // check OS has enabled XMM, YMM and ZMM state support + return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0; +} + +static HMODULE LoadKernel32dll() +{ + return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); +} + +static bool IsAvxEnabled() +{ + typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); + PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + + HMODULE hMod = LoadKernel32dll(); + if (hMod == NULL) + return FALSE; + + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); + + if (pfnGetEnabledXStateFeatures == NULL) + { + return FALSE; + } + + DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); + if ((FeatureMask & XSTATE_MASK_AVX) == 0) + { + return FALSE; + } + + return TRUE; +} + +static bool IsAvx512Enabled() +{ + typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); + PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; + + HMODULE hMod = LoadKernel32dll(); + if (hMod == NULL) + return FALSE; + + pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); + + if (pfnGetEnabledXStateFeatures == NULL) + { + return FALSE; + } + + DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); + if ((FeatureMask & XSTATE_MASK_AVX512) == 0) + { + return FALSE; + } + + return TRUE; +} +#endif // defined(TARGET_X86) || defined(TARGET_AMD64) +#endif // TARGET_WINDOWS + +int minipal_getcpufeatures(void) +{ + int result = 0; + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + + __cpuid(cpuidInfo, 0x00000000); + uint32_t maxCpuId = (uint32_t)cpuidInfo[CPUID_EAX]; + + if (maxCpuId >= 1) + { + __cpuid(cpuidInfo, 0x00000001); + + const int requiredBaselineEdxFlags = (1 << 25) // SSE + | (1 << 26); // SSE2 + + if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) + { + result |= XArchIntrinsicConstants_VectorT128; + + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI + { + result |= XArchIntrinsicConstants_Aes; + } + + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ + { + result |= XArchIntrinsicConstants_Pclmulqdq; + } + + if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 + { + result |= XArchIntrinsicConstants_Sse3; + + if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 + { + result |= XArchIntrinsicConstants_Ssse3; + + if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 + { + result |= XArchIntrinsicConstants_Sse41; + + if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 + { + result |= XArchIntrinsicConstants_Sse42; + + if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE + { + result |= XArchIntrinsicConstants_Movbe; + } + + if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT + { + result |= XArchIntrinsicConstants_Popcnt; + } + + const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE + | (1 << 28); // AVX + + if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) + { + if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 + { + result |= XArchIntrinsicConstants_Avx; + + if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA + { + result |= XArchIntrinsicConstants_Fma; + } + + if (maxCpuId >= 0x07) + { + __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + + if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 + { + result |= XArchIntrinsicConstants_Avx2; + result |= XArchIntrinsicConstants_VectorT256; + + if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + { + if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + { + result |= XArchIntrinsicConstants_Avx512f; + result |= XArchIntrinsicConstants_VectorT512; + + bool isAVX512_VLSupported = false; + if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL + { + result |= XArchIntrinsicConstants_Avx512f_vl; + isAVX512_VLSupported = true; + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW + { + result |= XArchIntrinsicConstants_Avx512bw; + if (isAVX512_VLSupported) // AVX512BW_VL + { + result |= XArchIntrinsicConstants_Avx512bw_vl; + } + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD + { + result |= XArchIntrinsicConstants_Avx512cd; + if (isAVX512_VLSupported) // AVX512CD_VL + { + result |= XArchIntrinsicConstants_Avx512cd_vl; + } + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ + { + result |= XArchIntrinsicConstants_Avx512dq; + if (isAVX512_VLSupported) // AVX512DQ_VL + { + result |= XArchIntrinsicConstants_Avx512dq_vl; + } + } + + if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI + { + result |= XArchIntrinsicConstants_Avx512Vbmi; + if (isAVX512_VLSupported) // AVX512VBMI_VL + { + result |= XArchIntrinsicConstants_Avx512Vbmi_vl; + } + } + } + } + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI + { + result |= XArchIntrinsicConstants_AvxVnni; + } + } + } + } + } + } + } + } + } + } + + if (maxCpuId >= 0x07) + { + __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + + if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 + { + result |= XArchIntrinsicConstants_Bmi1; + } + + if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 + { + result |= XArchIntrinsicConstants_Bmi2; + } + + if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) + { + result |= XArchIntrinsicConstants_Serialize; // SERIALIZE + } + } + } + + __cpuid(cpuidInfo, 0x80000000); + uint32_t maxCpuIdEx = (uint32_t)cpuidInfo[CPUID_EAX]; + + if (maxCpuIdEx >= 0x80000001) + { + __cpuid(cpuidInfo, 0x80000001); + + if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT + { + result |= XArchIntrinsicConstants_Lzcnt; + } + + } +#endif // TARGET_X86 || TARGET_AMD64 + +#if defined(TARGET_ARM64) +#if defined(TARGET_UNIX) + #if HAVE_AUXV_HWCAP_H + unsigned long hwCap = getauxval(AT_HWCAP); + + // HWCAP_* flags are introduced by ARM into the Linux kernel as new extensions are published. + // For a given kernel, some of these flags may not be present yet. + // Use ifdef for each to allow for compilation with any vintage kernel. + // From a single binary distribution perspective, compiling with latest kernel asm/hwcap.h should + // include all published flags. Given flags are merged to kernel and published before silicon is + // available, using the latest kernel for release should be sufficient. + +#ifdef HWCAP_AES + if (hwCap & HWCAP_AES) + result |= ARM64IntrinsicConstants_Aes; +#endif +#ifdef HWCAP_ATOMICS + if (hwCap & HWCAP_ATOMICS) + result |= ARM64IntrinsicConstants_Atomics; +#endif +#ifdef HWCAP_CRC32 + if (hwCap & HWCAP_CRC32) + result |= ARM64IntrinsicConstants_Crc32; +#endif +#ifdef HWCAP_DCPOP +// if (hwCap & HWCAP_DCPOP) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_ASIMDDP + if (hwCap & HWCAP_ASIMDDP) + result |= ARM64IntrinsicConstants_Dp; +#endif +#ifdef HWCAP_FCMA +// if (hwCap & HWCAP_FCMA) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_FP +// if (hwCap & HWCAP_FP) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_FPHP +// if (hwCap & HWCAP_FPHP) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_JSCVT +// if (hwCap & HWCAP_JSCVT) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_LRCPC + if (hwCap & HWCAP_LRCPC) + result |= ARM64IntrinsicConstants_Rcpc; +#endif +#ifdef HWCAP_PMULL +// if (hwCap & HWCAP_PMULL) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_SHA1 + if (hwCap & HWCAP_SHA1) + result |= ARM64IntrinsicConstants_Sha1; +#endif +#ifdef HWCAP_SHA2 + if (hwCap & HWCAP_SHA2) + result |= ARM64IntrinsicConstants_Sha256; +#endif +#ifdef HWCAP_SHA512 +// if (hwCap & HWCAP_SHA512) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_SHA3 +// if (hwCap & HWCAP_SHA3) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_ASIMD + if (hwCap & HWCAP_ASIMD) + result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; +#endif +#ifdef HWCAP_ASIMDRDM + if (hwCap & HWCAP_ASIMDRDM) + result |= ARM64IntrinsicConstants_Rdm; +#endif +#ifdef HWCAP_ASIMDHP +// if (hwCap & HWCAP_ASIMDHP) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_SM3 +// if (hwCap & HWCAP_SM3) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_SM4 +// if (hwCap & HWCAP_SM4) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP_SVE +// if (hwCap & HWCAP_SVE) +// result |= ARM64IntrinsicConstants_???; +#endif + +#ifdef AT_HWCAP2 + unsigned long hwCap2 = getauxval(AT_HWCAP2); + +#ifdef HWCAP2_DCPODP +// if (hwCap2 & HWCAP2_DCPODP) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVE2 +// if (hwCap2 & HWCAP2_SVE2) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVEAES +// if (hwCap2 & HWCAP2_SVEAES) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVEPMULL +// if (hwCap2 & HWCAP2_SVEPMULL) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVEBITPERM +// if (hwCap2 & HWCAP2_SVEBITPERM) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVESHA3 +// if (hwCap2 & HWCAP2_SVESHA3) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_SVESM4 +// if (hwCap2 & HWCAP2_SVESM4) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_FLAGM2 +// if (hwCap2 & HWCAP2_FLAGM2) +// result |= ARM64IntrinsicConstants_???; +#endif +#ifdef HWCAP2_FRINT +// if (hwCap2 & HWCAP2_FRINT) +// result |= ARM64IntrinsicConstants_???; +#endif + +#endif // AT_HWCAP2 + +#else // !HAVE_AUXV_HWCAP_H + +#if HAVE_SYSCTLBYNAME + int64_t valueFromSysctl = 0; + size_t sz = sizeof(valueFromSysctl); + + if ((sysctlbyname("hw.optional.arm.FEAT_AES", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Aes; + + if ((sysctlbyname("hw.optional.armv8_crc32", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Crc32; + + if ((sysctlbyname("hw.optional.arm.FEAT_DotProd", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Dp; + + if ((sysctlbyname("hw.optional.arm.FEAT_RDM", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Rdm; + + if ((sysctlbyname("hw.optional.arm.FEAT_SHA1", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Sha1; + + if ((sysctlbyname("hw.optional.arm.FEAT_SHA256", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Sha256; + + if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Atomics; + + if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) + result |= ARM64IntrinsicConstants_Rcpc; +#endif // HAVE_SYSCTLBYNAME + + // Every ARM64 CPU should support SIMD and FP + // If the OS have no function to query for CPU capabilities we set just these + + result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; +#endif // HAVE_AUXV_HWCAP_H +#endif // TARGET_UNIX + +#if defined(TARGET_WINDOWS) +// Older version of SDK would return false for these intrinsics +// but make sure we pass the right values to the APIs +#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34 +#endif +#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 +#endif +#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 +#endif + + // FP and SIMD support are enabled by default + result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + { + result |= ARM64IntrinsicConstants_Aes; + result |= ARM64IntrinsicConstants_Sha1; + result |= ARM64IntrinsicConstants_Sha256; + } + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) + { + result |= ARM64IntrinsicConstants_Crc32; + } + + if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) + { + result |= ARM64IntrinsicConstants_Atomics; + } + + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + { + result |= ARM64IntrinsicConstants_Dp; + } + + if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)) + { + result |= ARM64IntrinsicConstants_Rcpc; + } +#endif // TARGET_WINDOWS + +#endif // TARGET_ARM64 + + return result; +} diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/native/minipal/cpufeatures.h similarity index 84% rename from src/coreclr/nativeaot/Runtime/IntrinsicConstants.h rename to src/native/minipal/cpufeatures.h index 41ec8dec9c3d0..312bee84ace2b 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/native/minipal/cpufeatures.h @@ -1,12 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#ifndef INTRINSICCONSTANTS_INCLUDED -#define INTRINSICCONSTANTS_INCLUDED +#ifndef HAVE_MINIPAL_CPUFEATURES_H +#define HAVE_MINIPAL_CPUFEATURES_H -// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs +// +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs +// -#if defined(HOST_X86) || defined(HOST_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) enum XArchIntrinsicConstants { XArchIntrinsicConstants_Aes = 0x0001, @@ -39,9 +41,9 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_VectorT256 = 0x8000000, XArchIntrinsicConstants_VectorT512 = 0x10000000, }; -#endif //HOST_X86 || HOST_AMD64 +#endif // TARGET_X86 || TARGET_AMD64 -#if defined(HOST_ARM64) +#if defined(TARGET_ARM64) enum ARM64IntrinsicConstants { ARM64IntrinsicConstants_AdvSimd = 0x0001, @@ -56,10 +58,23 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_VectorT128 = 0x0200, }; +#include + // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions -static const int ARM64_ATOMICS_FEATURE_FLAG_BIT = 7; +#define ARM64_ATOMICS_FEATURE_FLAG_BIT 7 static_assert((1 << ARM64_ATOMICS_FEATURE_FLAG_BIT) == ARM64IntrinsicConstants_Atomics, "ARM64_ATOMICS_FEATURE_FLAG_BIT must match with ARM64IntrinsicConstants_Atomics"); -#endif //HOST_ARM64 +#endif // TARGET_ARM64 + +#ifdef __cplusplus +extern "C" +{ +#endif // __cplusplus + +int minipal_getcpufeatures(void); + +#ifdef __cplusplus +} +#endif // __cplusplus -#endif //!INTRINSICCONSTANTS_INCLUDED +#endif diff --git a/src/native/minipal/cpuid.h b/src/native/minipal/cpuid.h new file mode 100644 index 0000000000000..b8a6bd23b65f4 --- /dev/null +++ b/src/native/minipal/cpuid.h @@ -0,0 +1,54 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef HAVE_MINIPAL_CPUID_H +#define HAVE_MINIPAL_CPUID_H + +#if defined(TARGET_X86) || defined(TARGET_AMD64) + +#if defined(TARGET_WINDOWS) + +#include + +#endif // TARGET_WINDOWS + +#if defined(TARGET_UNIX) +// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures +// We define matching signatures for use on Unix platforms. +// +// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid + +#if !__has_builtin(__cpuid) +static void __cpuid(int cpuInfo[4], int function_id) +{ + // Based on the Clang implementation provided in cpuid.h: + // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h + + __asm(" cpuid\n" \ + : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ + : "0"(function_id) + ); +} +#else +void __cpuid(int cpuInfo[4], int function_id); +#endif + +#if !__has_builtin(__cpuidex) +static void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id) +{ + // Based on the Clang implementation provided in cpuid.h: + // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h + + __asm(" cpuid\n" \ + : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ + : "0"(function_id), "2"(subFunction_id) + ); +} +#else +void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id); +#endif + +#endif // TARGET_UNIX +#endif // defined(TARGET_X86) || defined(TARGET_AMD64) + +#endif diff --git a/src/native/minipal/minipalconfig.h.in b/src/native/minipal/minipalconfig.h.in new file mode 100644 index 0000000000000..b84247632ab00 --- /dev/null +++ b/src/native/minipal/minipalconfig.h.in @@ -0,0 +1,7 @@ +#ifndef HAVE_MINIPAL_MINIPALCONFIG_H +#define HAVE_MINIPAL_MINIPALCONFIG_H + +#cmakedefine01 HAVE_AUXV_HWCAP_H +#cmakedefine01 HAVE_SYSCTLBYNAME + +#endif