From bd88ff1039fbf45f37745a97e494dab30a3fa2e9 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 2 Jul 2025 16:10:05 +0200 Subject: [PATCH 1/2] Add HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=0 to MPS wrapper script to avoid GPU numa nodes --- docs/running/slurm.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running/slurm.md b/docs/running/slurm.md index 3d494bf6..cc203eb4 100644 --- a/docs/running/slurm.md +++ b/docs/running/slurm.md @@ -301,7 +301,7 @@ if [[ $SLURM_LOCALID -eq 0 ]]; then fi # Set CUDA device -numa_nodes=$(hwloc-calc --physical --intersect NUMAnode $(hwloc-bind --get --taskset)) +numa_nodes=$(HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=0 hwloc-calc --physical --intersect NUMAnode $(hwloc-bind --get --taskset)) export CUDA_VISIBLE_DEVICES=$numa_nodes # Wait for MPS to start From 2e406a2416ccd22d5f06cca3a22c60597696dc94 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Thu, 3 Jul 2025 11:56:05 +0200 Subject: [PATCH 2/2] Add comment about HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES --- docs/running/slurm.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/running/slurm.md b/docs/running/slurm.md index cc203eb4..0593b703 100644 --- a/docs/running/slurm.md +++ b/docs/running/slurm.md @@ -300,7 +300,9 @@ if [[ $SLURM_LOCALID -eq 0 ]]; then CUDA_VISIBLE_DEVICES=0,1,2,3 nvidia-cuda-mps-control -d fi -# Set CUDA device +# Set CUDA device. Disable HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES to avoid GPU NUMA +# nodes appearing in the list of CUDA devices. They start appearing in hwloc +# version 2.11. numa_nodes=$(HWLOC_KEEP_NVIDIA_GPU_NUMA_NODES=0 hwloc-calc --physical --intersect NUMAnode $(hwloc-bind --get --taskset)) export CUDA_VISIBLE_DEVICES=$numa_nodes