Skip to content

Commit

Permalink
Update Frontier installation (#1208)
Browse files Browse the repository at this point in the history
* Fix thrust build with rocm 5.7.1
* Fix non-agnostic test name
* Update frontier environment
* Load miniforge for python
* Ignore pr workflow for unrelated scripts
* Fix loaded data and cmake flags
* Use more cores
* Use conda path
* Unload darshan
  • Loading branch information
sethrj committed Apr 29, 2024
1 parent 772d57c commit 69cdb1a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 19 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@ on:
paths-ignore:
- '**.rst'
- '**.md'
- 'scripts/dev'
- 'scripts'
- '.jenkins'
- '!scripts/cmake-presets/ci*'
- '!scripts/ci'
- '!scripts/build.sh'

concurrency:
group: pr-${{github.ref}}-${{github.event.number}}-${{github.workflow}}
Expand Down
5 changes: 3 additions & 2 deletions scripts/cmake-presets/frontier.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"CELERITAS_TEST_RESOURCE_LOCK": {"type": "BOOL", "value": "ON"},
"CMAKE_HIP_ARCHITECTURES": {"type": "STRING", "value": "gfx90a"},
"CMAKE_HIP_FLAGS": "-munsafe-fp-atomics",
"CMAKE_CXX_FLAGS": "-Wno-unused-command-line-argument -Wall -Wextra -pedantic -fcolor-diagnostics",
"CMAKE_EXE_LINKER_FLAGS": "-Wno-unused-command-line-argument",
"CMAKE_HIP_FLAGS_DEBUG": "-g -ggdb -O",
"CMAKE_CXX_FLAGS_RELEASE": "-O3 -DNDEBUG -march=znver3 -mtune=znver3",
Expand Down Expand Up @@ -80,7 +81,7 @@
{
"name": "base",
"configurePreset": "base",
"jobs": 8,
"jobs": 64,
"nativeToolOptions": ["-k0"]
},
{"name": "ndebug", "configurePreset": "ndebug", "inherits": "base"}
Expand All @@ -90,7 +91,7 @@
"name": "base",
"configurePreset": "base",
"output": {"outputOnFailure": true},
"execution": {"noTestsAction": "error", "stopOnFailure": false, "jobs": 8}
"execution": {"noTestsAction": "error", "stopOnFailure": false, "jobs": 32}
},
{"name": "ndebug", "configurePreset": "ndebug", "inherits": "base"}
]
Expand Down
26 changes: 15 additions & 11 deletions scripts/env/frontier.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
#!/bin/sh -e

_celer_base=$PROJWORK/csc404/celeritas-frontier

# From spack compiler toolchain
module load amd/5.6.0 PrgEnv-amd/8.3.3 craype-x86-trento libfabric/1.15.2.0 cray-pmi/6.1.8
PROJID=hep143
_celer_view=${PROJWORK}/${PROJID}/opt-view
_tool_view=/ccs/proj/${PROJID}/opt-view
_conda=/ccs/proj/${PROJID}/conda-frontier

module load PrgEnv-amd/8.5.0 cpe/23.12 amd/5.7.1 craype-x86-trento \
libfabric/1.15.2.0 miniforge3/23.11.0
# Disable warning "Using generic mem* routines instead of tuned routines"
export RFE_811452_DISABLE=1
export LD_LIBRARY_PATH=/opt/cray/pe/pmi/6.1.8/lib:$LD_LIBRARY_PATH:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.2.0/lib64
export LIBRARY_PATH=/opt/rocm-5.6.0/lib:/opt/rocm-5.6.0/lib64:$LIBRARY_PATH

# Avoid linking multiple different libsci (one with openmp, one without)
module unload cray-libsci
# Avoid libraries interfering with I/O
module unload darshan-runtime

# Set up compilers
test -n "${CRAYPE_DIR}"
Expand All @@ -23,13 +27,13 @@ export CC=${CRAYPE_DIR}/bin/cc
# module load craype-accel-amd-gfx90a

# Set up celeritas
export SPACK_ROOT=/ccs/proj/csc404/spack-frontier
export PATH=${_celer_base}/spack/view/bin:$PATH
export CMAKE_PREFIX_PATH=${_celer_base}/spack/view:${CMAKE_PREFIX_PATH}
export MODULEPATH=${SPACK_ROOT}/share/spack/lmod/cray-sles15-x86_64/Core:${MODULEPATH}
export SPACK_ROOT=/ccs/proj/hep143/spack
export PATH=${_celer_view}/bin:${_tool_view}/bin:${_conda}/bin:$PATH
export CMAKE_PREFIX_PATH=${_celer_view}:${CMAKE_PREFIX_PATH}
export MODULEPATH=${PROJWORK}/${PROJID}/share/lmod/linux-sles15-x86_64/Core:${MODULEPATH}

# Set up Geant4 data
module load geant4-data
module load geant4-data/11.0
test -n "${G4ENSDFSTATEDATA}"
test -e "${G4ENSDFSTATEDATA}"

Expand Down
2 changes: 2 additions & 0 deletions src/corecel/device_runtime_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@

#if CELERITAS_USE_CUDA
# include <cuda_runtime_api.h>
# define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CUDA
#elif CELERITAS_USE_HIP
# include <hip/hip_runtime.h>
# define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP
#endif

#if CELERITAS_USE_CUDA || CELERITAS_USE_HIP
Expand Down
10 changes: 5 additions & 5 deletions test/corecel/data/DeviceAllocation.test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ TEST(ConstructionTest, should_work_always)
EXPECT_TRUE(alloc.empty());
}

TEST(ConstructionTest, nocuda)
{
#if !CELER_USE_DEVICE
// Can't allocate
EXPECT_THROW(DeviceAllocation(1234), DebugError);
TEST(ConstructionTest, nodevice)
#else
GTEST_SKIP() << "CUDA is enabled";
TEST(ConstructionTest, DISABLED_nodevice)
#endif
{
// Can't allocate
EXPECT_THROW(DeviceAllocation(1234), DebugError);
}

TEST(DeviceAllocationTest, TEST_IF_CELER_DEVICE(device))
Expand Down

0 comments on commit 69cdb1a

Please sign in to comment.