Skip to content

Commit d3fa1f2

Browse files
authored
Add support to detect Neoverse V2 cores (#1706)
### Description of changes: Graviton 4 uses Neoverse V2 cores which we were previously not detecting which resulted in poor performing implementations being used. It still produced the correct answer, it just didn't take full advantage of the CPU's capabilities. ### Call-outs: I thought about combining the ARM CPU capability flag for Neoverse V1 and V2 cores into one flag but we might have a future usecase that would need specific handling even though right now they behave the same. We also need to test Apple M2 and M3 CPUs which will probably also want the same optimizations as M1. This also might make sense to combine into a single Apple "M" ARM capability flag. ### Testing: Built and ran locally. On a Graviton 4 instance: |Algorithm|Before|After| |---|---|---| |RSA 2048 sign|929.6 ops/sec|1,397.5 ops/sec| |ECDH P-384|3,541.2 ops/sec|3,744.8 ops/sec| |ECDH P-521|1,885.1 ops/sec|2,406.7 ops/sec |AES 256 GCM 16 bytes|204.4 MB/s|203.9 MB/s| |AES 256 GCM 16kb bytes|4,500.5 MB/s|6,019.0 MB/s| By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and the ISC license.
1 parent 10f73d0 commit d3fa1f2

File tree

4 files changed

+12
-2
lines changed

4 files changed

+12
-2
lines changed

crypto/fipsmodule/cpucap/cpu_aarch64_linux.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,15 @@ void OPENSSL_cpuid_setup(void) {
8080
// is supported. As of Valgrind 3.21 trying to read from that register will
8181
// cause Valgrind to crash.
8282
if (hwcap & kCPUID) {
83-
// Check if the CPU model is Neoverse V1,
83+
// Check if the CPU model is Neoverse V1 or V2,
8484
// which has a wide crypto/SIMD pipeline.
8585
OPENSSL_arm_midr = armv8_cpuid_probe();
8686
if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1)) {
8787
OPENSSL_armcap_P |= ARMV8_NEOVERSE_V1;
8888
}
89+
if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) {
90+
OPENSSL_armcap_P |= ARMV8_NEOVERSE_V2;
91+
}
8992
}
9093

9194
// OPENSSL_armcap is a 32-bit, unsigned value which may start with "0x" to

crypto/fipsmodule/cpucap/cpucap.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ HIDDEN uint32_t OPENSSL_armcap_P =
7474
#endif
7575
#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V1) || defined(__ARM_FEATURE_NEOVERSE_V1)
7676
ARMV8_NEOVERSE_V1 |
77+
#endif
78+
#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V2) || defined(__ARM_FEATURE_NEOVERSE_V2)
79+
ARMV8_NEOVERSE_V2 |
7780
#endif
7881
0;
7982

crypto/fipsmodule/cpucap/internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,13 @@ OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
186186
OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) {
187187
return ((OPENSSL_armcap_P & ARMV8_SHA3) != 0 &&
188188
((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
189+
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
189190
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0));
190191
}
191192

192193
OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) {
193194
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
195+
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
194196
(OPENSSL_armcap_P & ARMV8_APPLE_M1) != 0;
195197
}
196198

include/openssl/arm_arch.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@
8282
// ARMV8_SHA3 indicates support for hardware SHA-3 instructions including EOR3.
8383
#define ARMV8_SHA3 (1 << 11)
8484

85-
// The Neoverse V1 and Apple M1 micro-architectures are detected to enable
85+
// The Neoverse V1, V2, and Apple M1 micro-architectures are detected to enable
8686
// high unrolling factor of AES-GCM and other algorithms that leverage a
8787
// wide crypto pipeline and fast multiplier.
8888
#define ARMV8_NEOVERSE_V1 (1 << 12)
8989
#define ARMV8_APPLE_M1 (1 << 13)
90+
#define ARMV8_NEOVERSE_V2 (1 << 14)
9091

9192
//
9293
// MIDR_EL1 system register
@@ -102,6 +103,7 @@
102103
# define ARM_CPU_PART_CORTEX_A72 0xD08
103104
# define ARM_CPU_PART_N1 0xD0C
104105
# define ARM_CPU_PART_V1 0xD40
106+
# define ARM_CPU_PART_V2 0xD4F
105107

106108
# define MIDR_PARTNUM_SHIFT 4
107109
# define MIDR_PARTNUM_MASK (0xfffUL << MIDR_PARTNUM_SHIFT)

0 commit comments

Comments
 (0)