Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{lib}[GCCcore/12.2.0,foss/2022b] PyTorch v1.13.1, cuDNN v8.5.0.96, magma v2.7.1, ... w/ CUDA 11.7.0 #18853

Merged
40 changes: 40 additions & 0 deletions easybuild/easyconfigs/c/cuDNN/cuDNN-8.5.0.96-CUDA-11.7.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name = 'cuDNN'
version = '8.5.0.96'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://developer.nvidia.com/cudnn'
description = """The NVIDIA CUDA Deep Neural Network library (cuDNN) is
a GPU-accelerated library of primitives for deep neural networks."""

toolchain = SYSTEM

# note: cuDNN is tied to specific to CUDA versions,
# see also https://docs.nvidia.com/deeplearning/cudnn/support-matrix/index.html#cudnn-cuda-hardware-versions
local_short_ver = '.'.join(version.split('.')[:3])
source_urls = ['https://developer.download.nvidia.com/compute/redist/cudnn/'
'v%s/local_installers/%%(cudashortver)s/' % local_short_ver]
sources = ['%(namelower)s-linux-%(cudnnarch)s-%(version)s_cuda%(cudamajver)s-archive.tar.xz']
checksums = [
{
'%(namelower)s-linux-x86_64-%(version)s_cuda%(cudamajver)s-archive.tar.xz':
'5454a6fd94f008728caae9adad993c4e85ef36302e26bce43bea7d458a5e7b6d',
'%(namelower)s-linux-ppc64le-%(version)s_cuda%(cudamajver)s-archive.tar.xz':
'00373c3d5e0b536a5557d0d0eb50706777f213a222b4030e1b71b1bec43d205f',
'%(namelower)s-linux-sbsa-%(version)s_cuda%(cudamajver)s-archive.tar.xz':
'86780abbecd4634e7363fad1d000ae23b7905a5f8383bddbf7332c6934791dde',
}
]

dependencies = [('CUDA', '11.7.0')]

sanity_check_paths = {
'files': [
'include/cudnn.h', 'lib64/libcudnn_adv_infer_static.a', 'lib64/libcudnn_adv_train_static.a',
'lib64/libcudnn_cnn_infer_static.a', 'lib64/libcudnn_cnn_train_static.a',
'lib64/libcudnn_ops_infer_static.a', 'lib64/libcudnn_ops_train_static.a',
'lib64/libcudnn.%s' % SHLIB_EXT
],
'dirs': ['include', 'lib64'],
}

moduleclass = 'numlib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
easyblock = 'CMakeMake'

name = 'magma'
version = '2.7.1'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://icl.cs.utk.edu/magma/'
description = """The MAGMA project aims to develop a dense linear algebra library similar to
LAPACK but for heterogeneous/hybrid architectures, starting with current Multicore+GPU systems."""

toolchain = {'name': 'foss', 'version': '2022b'}
toolchainopts = {'pic': True, 'openmp': True}

source_urls = ['https://icl.cs.utk.edu/projectsfiles/%(name)s/downloads/']
sources = [SOURCE_TAR_GZ]
checksums = ['d9c8711c047a38cae16efde74bee2eb3333217fd2711e1e9b8606cbbb4ae1a50']

builddependencies = [
('CMake', '3.24.3'),
]
dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
('UCX-CUDA', '1.13.1', versionsuffix),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '5.0', '6.0', '7.0', '7.5', '8.0', '8.6']
akesandgren marked this conversation as resolved.
Show resolved Hide resolved

# make sure both static and shared libs are built
local_common_opts = '-DCMAKE_CUDA_FLAGS=-allow-unsupported-compiler -DGPU_TARGET="%%(cuda_sm_space_sep)s"'
configopts = [
(local_common_opts + ' -DBUILD_SHARED_LIBS=%s ') % local_shared for local_shared in ('ON', 'OFF')
]

sanity_check_paths = {
'files': ['lib/libmagma.%s' % SHLIB_EXT, 'lib/libmagma.a'],
'dirs': ['include'],
}

moduleclass = 'math'
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name = 'NCCL'
version = '2.16.2'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://developer.nvidia.com/nccl'
description = """The NVIDIA Collective Communications Library (NCCL) implements multi-GPU and multi-node collective
communication primitives that are performance optimized for NVIDIA GPUs."""

toolchain = {'name': 'GCCcore', 'version': '12.2.0'}

github_account = 'NVIDIA'
source_urls = [GITHUB_SOURCE]
sources = ['v%(version)s-1.tar.gz']
patches = ['NCCL-2.16.2_fix-cpuid.patch']
checksums = [
{'v2.16.2-1.tar.gz': '7f7c738511a8876403fc574d13d48e7c250d934d755598d82e14bab12236fc64'},
{'NCCL-2.16.2_fix-cpuid.patch': '0459ecadcd32b2a7a000a2ce4f675afba908b2c0afabafde585330ff4f83e277'},
]

builddependencies = [('binutils', '2.39')]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
('UCX-CUDA', '1.13.1', versionsuffix),
]

prebuildopts = "sed -i 's/NVCUFLAGS := /NVCUFLAGS := -allow-unsupported-compiler /' makefiles/common.mk && "
buildopts = "VERBOSE=1"

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '5.0', '6.0', '7.0', '7.5', '8.0', '8.6']
akesandgren marked this conversation as resolved.
Show resolved Hide resolved

moduleclass = 'lib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
name = 'PyTorch'
version = '1.13.1'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022b'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.10.0_fix-kineto-crash.patch',
'PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-skip-decorators.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-test_wishart_log_prob.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch',
'PyTorch-1.12.1_fix-vsx-loadu.patch',
'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_allow-GCC-12-for-CUDA-11.7.patch',
'PyTorch-1.13.1_disable-test-sharding.patch',
'PyTorch-1.13.1_fix-duplicate-kDefaultTimeout-definition.patch',
'PyTorch-1.13.1_fix-flaky-jit-test.patch',
'PyTorch-1.13.1_fix-fsdp-fp16-test.patch',
'PyTorch-1.13.1_fix-fsdp-tp-integration-test.patch',
'PyTorch-1.13.1_fix-gcc-12-missing-includes.patch',
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
'PyTorch-1.13.1_fix-kineto-crash-on-exit.patch',
'PyTorch-1.13.1_fix-numpy-deprecations.patch',
'PyTorch-1.13.1_fix-protobuf-dependency.patch',
'PyTorch-1.13.1_fix-pytest-args.patch',
'PyTorch-1.13.1_fix-python-3.11-compat.patch',
'PyTorch-1.13.1_fix-test-ops-conf.patch',
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
'PyTorch-1.13.1_fix-wrong-check-in-fsdp-tests.patch',
'PyTorch-1.13.1_increase-tolerance-test_jit.patch',
'PyTorch-1.13.1_increase-tolerance-test_ops.patch',
'PyTorch-1.13.1_increase-tolerance-test_optim.patch',
'PyTorch-1.13.1_install-vsx-vec-headers.patch',
'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.13.1_skip-failing-grad-test.patch',
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
'PyTorch-1.13.1_skip-test_find_unused_parameters-detail.patch',
'PyTorch-1.13.1_skip-test-requiring-online-access.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
'PyTorch-1.13.1_workaround-gcc12-destructor-exception-bug.patch',
'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
]
checksums = [
{'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'},
{'PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch':
'cc526130b6446bbbf5f0f7372d3aeee3e7d4c4d6e471524dff028b430b152934'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch':
'090598592283e3fc46ee08a68b6a6afe07be41b26514afba51834408bf1c98ed'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-test_wishart_log_prob.patch':
'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch':
'0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'},
{'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'},
{'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_allow-GCC-12-for-CUDA-11.7.patch':
'4c9a4247dcf6e0f62fda2e7283f7de6f7c801d5e61c39d27a91a287f9d363d68'},
{'PyTorch-1.13.1_disable-test-sharding.patch': 'df2074adeba47998ce2993d99ca64eb6f1c79ab7057f553b436efdec264d3572'},
{'PyTorch-1.13.1_fix-duplicate-kDefaultTimeout-definition.patch':
'882f8cfaf33490a4372928fb6673cbbfa40e5be1b64bf7e0cc2924d73cf872e8'},
{'PyTorch-1.13.1_fix-flaky-jit-test.patch': '71efdeb29b5e5b4982c9f5cb2182733654a34d52f85bb5487bc4d7d99b86101b'},
{'PyTorch-1.13.1_fix-fsdp-fp16-test.patch': '8ae68e60d6e1f92f50322b7f0381c7e65251fba32d7606e3a238a36a2f55b5cf'},
{'PyTorch-1.13.1_fix-fsdp-tp-integration-test.patch':
'31e2d63b54ae1a8c554575f46db79bf8bbda851b6ca0ffe623c4911207a3c2bc'},
{'PyTorch-1.13.1_fix-gcc-12-missing-includes.patch':
'18df8c61ecaa9fb659346c1e172828bca6b069f0145bb8f6a36b0a23b7bef0a6'},
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
{'PyTorch-1.13.1_fix-kineto-crash-on-exit.patch':
'f1e6808ee8d91a2ad76e0caedb4685e5aec3008d5e2e3c3c3e88cbb25cbd71b4'},
{'PyTorch-1.13.1_fix-numpy-deprecations.patch': 'f461b570efe0434ddd806bf2fa7020eb213e3ed89d0eb4403e076f4276ba2a46'},
{'PyTorch-1.13.1_fix-protobuf-dependency.patch':
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
{'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'},
{'PyTorch-1.13.1_fix-python-3.11-compat.patch': 'fa4eb0e27e00a90bb217b77c0023089c4659c03f37d781ab4a681bdcb4f0432f'},
{'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'},
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
{'PyTorch-1.13.1_fix-wrong-check-in-fsdp-tests.patch':
'cbb5ca9ad668a504a456a2cc02d7254b79ddfd9a971a1648f0508fb103a9fc89'},
{'PyTorch-1.13.1_increase-tolerance-test_jit.patch':
'b97913754a0ae0887b8137db0b0d57caff8c3d7bd96fe555ea27ea01ff14527a'},
{'PyTorch-1.13.1_increase-tolerance-test_ops.patch':
'c909fdfc2b12df457e1eb5514265ffec3eab653994949416f3f048668421e223'},
{'PyTorch-1.13.1_increase-tolerance-test_optim.patch':
'a079d824085eab89794f5ecfc67792f735ed8cfd3fe7db52e4dea62e583cfe06'},
{'PyTorch-1.13.1_install-vsx-vec-headers.patch':
'7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'},
{'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
{'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
{'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'},
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
{'PyTorch-1.13.1_skip-test_find_unused_parameters-detail.patch':
'c71a3385ce5fc447f908a3df78ade2143d97e2538cf03b530db4f6cc8b32c22b'},
{'PyTorch-1.13.1_skip-test-requiring-online-access.patch':
'61c3b7859dc06a9969981b07aa2789630de110d6d1d3633d27364be47af74712'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
{'PyTorch-1.13.1_workaround-gcc12-destructor-exception-bug.patch':
'a09a2d7ebd428c65988729578bb3fa372565ba176ab9ed7abf11f6fcb15e903e'},
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.24.3'),
('hypothesis', '6.68.2'),
# For tests
('pytest-rerunfailures', '12.0'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions
('Python', '3.10.8'),
('protobuf', '23.0'),
('protobuf-python', '4.23.0'),
('pybind11', '2.10.3'),
('SciPy-bundle', '2023.02'),
('PyYAML', '6.0'),
('MPFR', '4.2.0'),
('GMP', '6.2.1'),
('numactl', '2.0.16'),
('FFmpeg', '5.1.2'),
('Pillow', '9.4.0'),
('cuDNN', '8.5.0.96', '-CUDA-%(cudaver)s', SYSTEM),
('magma', '2.7.1', '-CUDA-%(cudaver)s'),
('NCCL', '2.16.2', '-CUDA-%(cudaver)s'),
('expecttest', '0.1.3'),
]

custom_opts = ['CMAKE_CUDA_FLAGS=-allow-unsupported-compiler']

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# The readelf sanity check command can be taken out once the TestRPATH test from
# https://github.com/pytorch/pytorch/pull/87593 is accepted, since it is then checked as part of the PyTorch test suite
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
sanity_check_commands = [
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
]

# Especially test_quantization has a few corner cases that are triggered by the random input values,
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
# So allow a low number of tests to fail as the tests "usually" succeed
max_failed_tests = 2

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
The JIT generator for CUDA fails as GCC 12 isn't officially compatible with CUDA 11.7.
We can make it compatible by passing `-allow-unsupported-compiler`
but also need to tell the PyTorch code about the raised maximum compiler version.

Author: Alexander Grund (TU Dresden)

diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py
index 9ab43e5ccdd..15da97619be 100644
--- a/torch/utils/cpp_extension.py
+++ b/torch/utils/cpp_extension.py
@@ -56,7 +56,7 @@ CUDA_GCC_VERSIONS = {
'11.4': ((6, 0, 0), (11, 5, 0)),
'11.5': ((6, 0, 0), (11, 5, 0)),
'11.6': ((6, 0, 0), (11, 5, 0)),
- '11.7': ((6, 0, 0), (11, 5, 0)),
+ '11.7': ((6, 0, 0), (12, 3, 0)),
}

CUDA_CLANG_VERSIONS = {
@@ -227,7 +227,8 @@ COMMON_NVCC_FLAGS = [
'-D__CUDA_NO_HALF_CONVERSIONS__',
'-D__CUDA_NO_BFLOAT16_CONVERSIONS__',
'-D__CUDA_NO_HALF2_OPERATORS__',
- '--expt-relaxed-constexpr'
+ '--expt-relaxed-constexpr',
+ '-allow-unsupported-compiler',
]

COMMON_HIP_FLAGS = [
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
In distributed/test_c10d_nccl the tests
test_find_unused_parameters_kwarg_debug_detail and
test_find_unused_parameters_kwarg_grad_is_view_debug_detail
are failing often on some systems with the root error seemingly being
> terminate called after throwing an instance of 'c10::Error'
> what(): CUDA error: driver shutting down

Stacktrace:
frame #0: c10::Error::Error(<snip>) + 0x8d (0x2ae861eff2cd in <snip>/torch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(<snip>) + 0xd0 (0x2ae861ec64d1 in <snip>/torch/lib/libc10.so)
frame #2: c10::cuda::c10_cuda_check_implementation(<snip>) + 0x352 (0x2ae861e948c2 in <snip>/torch/lib/libc10_cuda.so)
frame #3: c10d::ProcessGroupNCCL::WorkNCCL::startedGPUExecutionInternal() const + 0x140 (0x2ae848587e80 in <snip>/torch/lib/libtorch_cuda.so)
frame #4: c10d::ProcessGroupNCCL::WorkNCCL::isStarted() + 0x58 (0x2ae84858a1b8 in <snip>/torch/lib/libtorch_cuda.so)
frame #5: c10d::ProcessGroupNCCL::workCleanupLoop() + 0x3c8 (0x2ae84858ee18 in <snip>/torch/lib/libtorch_cuda.so)

Just skip the tests to avoid failing the testsuite.

Author: Alexander Grund (TU Dresden)

diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py
index 6a0858eebf8..7340a89db82 100644
--- a/test/distributed/test_c10d_nccl.py
+++ b/test/distributed/test_c10d_nccl.py
@@ -12,7 +12,7 @@ import time
from contextlib import contextmanager
from datetime import timedelta
from itertools import product
-from unittest import mock
+from unittest import mock, skip

import torch
import torch.distributed as c10d
@@ -1460,6 +1460,7 @@ class DistributedDataParallelTest(

# TODO: Combine the following tests once https://github.com/pytorch/pytorch/issues/55967
# is resolved.
+ @skip("Debug level DETAIL fails on some systems/CUDA versions")
@requires_nccl()
@skip_if_lt_x_gpu(2)
@with_dist_debug_levels(levels=["DETAIL"])
@@ -1478,6 +1479,7 @@ class DistributedDataParallelTest(
def test_find_unused_parameters_kwarg_debug_off(self):
self._test_find_unused_parameters_kwarg()

+ @skip("Debug level DETAIL fails on some systems/CUDA versions")
@requires_nccl()
@skip_if_lt_x_gpu(2)
@with_dist_debug_levels(levels=["DETAIL"])