From 7899413e4b4e17d424ae8fad23e262f545dab16b Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Thu, 15 Feb 2024 16:27:46 -0700 Subject: [PATCH] Add Grace CPU architecture --- Makefile.kokkos | 13 +++++++++++-- cmake/kokkos_arch.cmake | 9 +++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Makefile.kokkos b/Makefile.kokkos index c7edfdf1aa4..9cdc70c6a0f 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -11,7 +11,7 @@ KOKKOS_DEVICES ?= "Threads" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX +# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 # AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 @@ -384,7 +384,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8 KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) +KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) +$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) @@ -731,6 +732,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") + + KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 + KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 5ee6f44613e..a69865de246 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -49,6 +49,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU") DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support") +DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU") DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs") DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs") DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs") @@ -301,6 +302,14 @@ IF (KOKKOS_ARCH_A64FX) ) ENDIF() +IF (KOKKOS_ARCH_ARMV9_GRACE) + SET(KOKKOS_ARCH_ARM_NEON ON) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -mcpu=neoverse-v2 -msve-vector-bits=128 + ) +ENDIF() + IF (KOKKOS_ARCH_ZEN) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID