easybuilders · branfosj · Jan 19, 2023 · Feb 10, 2023 · Feb 10, 2023 · Feb 10, 2023
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a-CUDA-11.7.0.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1-foss-2022a-CUDA-11.7.0.eb
@@ -0,0 +1,137 @@
+name = 'PyTorch'
+version = '1.13.1'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+
+toolchain = {'name': 'foss', 'version': '2022a'}
+
+source_urls = [GITHUB_RELEASE]
+sources = ['%(namelower)s-v%(version)s.tar.gz']
+patches = [
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    'PyTorch-1.10.0_fix-kineto-crash.patch',
+    'PyTorch-1.11.0_fix-fsdp-fp16-test.patch',
+    'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
+    'PyTorch-1.12.1_add-hypothesis-suppression.patch',
+    'PyTorch-1.12.1_fix-skip-decorators.patch',
+    'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
+    'PyTorch-1.12.1_fix-test_wishart_log_prob.patch',
+    'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
+    'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch',
+    'PyTorch-1.12.1_fix-vsx-vector-funcs.patch',
+    'PyTorch-1.12.1_fix-vsx-loadu.patch',
+    'PyTorch-1.12.1_skip-test_round_robin.patch',
+    'PyTorch-1.13.1_fix-pytest-args.patch',
+    'PyTorch-1.13.1_fix-test-ops-conf.patch',
+    'PyTorch-1.13.1_no-cuda-stubs-rpath.patch',
+    'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch',
+    'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch',
+    'PyTorch-1.13.1_increase-tolerance-test_ops.patch',
+    'PyTorch-1.13.1_install-vsx-vec-headers.patch',
+    'PyTorch-1.13.1_skip-failing-grad-test.patch',
+    'PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch',
+]
+checksums = [
+    {'pytorch-v1.13.1.tar.gz': 'dbc229ee9750b02b514937d017744443a269ea0241ed3f32b9af0703589d25d4'},
+    {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
+    {'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'},
+    {'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'},
+    {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
+     '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
+    {'PyTorch-1.12.1_add-hypothesis-suppression.patch':
+     'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
+    {'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'},
+    {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
+     '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
+    {'PyTorch-1.12.1_fix-test_wishart_log_prob.patch':
+     'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'},
+    {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
+    {'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch':
+     '0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'},
+    {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'},
+    {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'},
+    {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
+    {'PyTorch-1.13.1_fix-pytest-args.patch': 'd3e3c841cf8d73683750f29326f2be56ee0bb5df7ff522baf7d7c3f301a91ec2'},
+    {'PyTorch-1.13.1_fix-test-ops-conf.patch': 'df652eec7753864ebebbfeca546929a53e3fb8f24259d5c9b964266a8551198c'},
+    {'PyTorch-1.13.1_no-cuda-stubs-rpath.patch': '4c636059850fc9d1ecb27ce275f8aad5d5b6fdc19e35aff0c25b86cb3201352a'},
+    {'PyTorch-1.13.1_remove-flaky-test-in-testnn.patch':
+     'be83ff61fe2dedab6d49c232936d5622df81ab49154264490021c6c828e53315'},
+    {'PyTorch-1.13.1_skip-ao-sparsity-test-without-fbgemm.patch':
+     '92cd48ef6d01aa7e07ccce1dcaf40bc3fb0f220c4aa4fea15f3e05fb42e37909'},
+    {'PyTorch-1.13.1_increase-tolerance-test_ops.patch':
+     'd53e98bf0da7788b68042dcc31bc5708dae962fde3f110cc827eb807a5d08e49'},
+    {'PyTorch-1.13.1_install-vsx-vec-headers.patch':
+     '7b678f54bb947afd4767f5877ac424b4b94ce5db609ea20f5a869ccf4027035f'},
+    {'PyTorch-1.13.1_skip-failing-grad-test.patch': '6681200f9509893cb9231b5c93ac9bc5e6d9d9ae4febefca52e7cbc843ba8f51'},
+    {'PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch':
+     'a0fda8dd30cda265e958830495618b4214c09d40e31c9172c164eff1385adbbc'},
+]
+
+osdependencies = [OS_PKG_IBVERBS_DEV]
+
+builddependencies = [
+    ('CMake', '3.23.1'),
+    ('hypothesis', '6.46.7'),
+    # For tests
+    ('pytest-rerunfailures', '11.1'),
+    ('pytest-shard', '0.1.2'),
+]
+
+dependencies = [
+    ('CUDA', '11.7.0', '', SYSTEM),
+    ('Ninja', '1.10.2'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.10.4'),
+    ('protobuf', '3.19.4'),
+    ('protobuf-python', '3.19.4'),
+    ('pybind11', '2.9.2'),
+    ('SciPy-bundle', '2022.05'),
+    ('PyYAML', '6.0'),
+    ('MPFR', '4.1.0'),
+    ('GMP', '6.2.1'),
+    ('numactl', '2.0.14'),
+    ('FFmpeg', '4.4.2'),
+    ('Pillow', '9.1.1'),
+    ('cuDNN', '8.4.1.50', '-CUDA-%(cudaver)s', SYSTEM),
+    ('magma', '2.6.2', '-CUDA-%(cudaver)s'),
+    ('NCCL', '2.12.12', '-CUDA-%(cudaver)s'),
+    ('expecttest', '0.1.3'),
+]
+
+# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
+cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']
+
+excluded_tests = {
+    '': [
+        # This test seems to take too long on NVIDIA Ampere at least.
+        'distributed/test_distributed_spawn',
+        # Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
+        'distributions/test_constraints',
+        # no xdoctest
+        'doctests',
+        # failing on broadwell
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'test_native_mha',
+        # intermittent failures on various systems
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'distributed/rpc/test_tensorpipe_agent',
+        # Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
+        # Fails intermittently: https://github.com/pytorch/pytorch/issues/98414
+        'test_optim',
+    ]
+}
+
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error  --verbose %(excluded_tests)s'
+
+# The readelf sanity check command can be taken out once the TestRPATH test from
+# https://github.com/pytorch/pytorch/pull/87593 is accepted, since it is then checked as part of the PyTorch test suite
+local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
+sanity_check_commands = [
+    "readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
+]
+
+tests = ['PyTorch-check-cpp-extension.py']
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.13.1_skip-test_freeze_conv_relu_fusion.patch
@@ -0,0 +1,17 @@
+Skip test_freeze_conv_relu_fusion
+Mismatched elements: 10 / 30 (33.3%)
+Greatest absolute difference: 3.057718276977539e-05 at index (2, 3, 0, 0, 0) (up to 1e-05 allowed)
+Greatest relative difference: 8.758584417742737e-05 at index (0, 3, 0, 0, 0) (up to 1.3e-06 allowed)
+
+Patch by Simon Branford (University of Birmingham)
+--- test/jit/test_freezing.py.orig	2023-07-06 09:29:49.457408000 +0100
++++ test/jit/test_freezing.py	2023-07-06 09:30:33.271766773 +0100
+@@ -2208,7 +2208,7 @@
+             inp = torch.rand([4, 3, 4, 4])
+             self.assertEqual(frozen(inp), mod(inp))
+
+-    @unittest.skipIf(not (TEST_CUDNN or TEST_WITH_ROCM), "requires CUDNN")
++    @unittest.skipIf(True, "numerical error")
+     def test_freeze_conv_relu_fusion(self):
+         with set_default_dtype(torch.float):
+             conv_bias = [True, False]