Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{ai}[foss/2022a] PyTorch v1.13.1 w/ Python 3.10.4 w/ CUDA 11.7.0 #17156

Closed
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
name = 'PyTorch'
version = '1.13.1'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022a'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.10.0_fix-kineto-crash.patch',
'PyTorch-1.11.0_fix-fsdp-fp16-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-skip-decorators.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-test_wishart_log_prob.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch',
'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
'PyTorch-1.12.1_fix-vsx-loadu.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-pytest-args.patch',
'PyTorch-1.13.1_fix-test-ops-conf.patch',
'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch',
'PyTorch-1.13.1_increase-tolerance-test_ops.patch',
'PyTorch-1.13.1_install-vsx-vec-headers.patch',
'PyTorch-1.13.1_skip-failing-grad-test.patch',
'PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch',
]
checksums = [
{'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'},
{'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-test_wishart_log_prob.patch':
'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch':
'0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'},
{'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
{'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'},
{'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'},
{'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
{'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
{'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch':
'92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'},
{'PyTorch-1.13.1_increase-tolerance-test_ops.patch':
'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'},
{'PyTorch-1.13.1_install-vsx-vec-headers.patch':
'7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'},
{'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'},
{'PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch':
'a0fda8dd30cda265e958830495618b4214c09d40e31c9172c164eff1385adbbc'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.23.1'),
('hypothesis', '6.46.7'),
# For tests
('pytest-rerunfailures', '11.1'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('CUDA', '11.7.0', '', SYSTEM),
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
('Python', '3.10.4'),
('protobuf', '3.19.4'),
('protobuf-python', '3.19.4'),
('pybind11', '2.9.2'),
('SciPy-bundle', '2022.05'),
('PyYAML', '6.0'),
('MPFR', '4.1.0'),
('GMP', '6.2.1'),
('numactl', '2.0.14'),
('FFmpeg', '4.4.2'),
('Pillow', '9.1.1'),
('cuDNN', '8.4.1.50', '-CUDA-%(cudaver)s', SYSTEM),
('magma', '2.6.2', '-CUDA-%(cudaver)s'),
('NCCL', '2.12.12', '-CUDA-%(cudaver)s'),
('expecttest', '0.1.3'),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
# Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
# Fails intermittently: https://github.com/pytorch/pytorch/issues/98414
'test_optim',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# The readelf sanity check command can be taken out once the TestRPATH test from
# https://github.com/pytorch/pytorch/pull/87593 is accepted, since it is then checked as part of the PyTorch test suite
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
sanity_check_commands = [
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
]

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Skip test_freeze_conv_relu_fusion
Mismatched elements: 10 / 30 (33.3%)
Greatest absolute difference: 3.057718276977539e-05 at index (2, 3, 0, 0, 0) (up to 1e-05 allowed)
Greatest relative difference: 8.758584417742737e-05 at index (0, 3, 0, 0, 0) (up to 1.3e-06 allowed)

Patch by Simon Branford (University of Birmingham)
--- test/jit/test_freezing.py.orig 2023-07-06 09:29:49.457408000 +0100
+++ test/jit/test_freezing.py 2023-07-06 09:30:33.271766773 +0100
@@ -2208,7 +2208,7 @@
inp = torch.rand([4, 3, 4, 4])
self.assertEqual(frozen(inp), mod(inp))

- @unittest.skipIf(not (TEST_CUDNN or TEST_WITH_ROCM), "requires CUDNN")
+ @unittest.skipIf(True, "numerical error")
def test_freeze_conv_relu_fusion(self):
with set_default_dtype(torch.float):
conv_bias = [True, False]