easybuilders · boegel · May 4, 2021 · Mar 6, 2021 · Mar 12, 2021 · Mar 31, 2021
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.0_correct-skip-tests-decorators.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.0_correct-skip-tests-decorators.patch
@@ -0,0 +1,109 @@
+Based off https://github.com/pytorch/pytorch/pull/53736
+Fixed to apply to PyTorch 1.8.0 by Simon Branford (University of Birmingham)
+--- test/test_spectral_ops.py	2021-03-12 19:36:32.910758000 +0000
++++ test/test_spectral_ops.py	2021-03-12 19:41:42.314654931 +0000
+@@ -185,6 +185,7 @@
+         with self.assertRaisesRegex(RuntimeError, match):
+             op(t)
+
++    @onlyOnCPUAndCUDA
+     def test_fft_invalid_dtypes(self, device):
+         t = torch.randn(64, device=device, dtype=torch.complex128)
+
+@@ -599,7 +600,6 @@
+         _test_complex((40, 60, 3, 80), 3, lambda x: x.transpose(2, 0).select(0, 2)[5:55, :, 10:])
+         _test_complex((30, 55, 50, 22), 3, lambda x: x[:, 3:53, 15:40, 1:21])
+
+-    @skipCUDAIfRocm
+     @skipCPUIfNoMkl
+     @onlyOnCPUAndCUDA
+     @dtypes(torch.double)
+@@ -680,8 +680,8 @@
+                         self.assertEqual(torch.backends.cuda.cufft_plan_cache.max_size, 11)  # default is cuda:1
+
+     # passes on ROCm w/ python 2.7, fails w/ python 3.6
+-    @skipCUDAIfRocm
+     @skipCPUIfNoMkl
++    @onlyOnCPUAndCUDA
+     @dtypes(torch.double)
+     def test_stft(self, device, dtype):
+         if not TEST_LIBROSA:
+@@ -712,9 +712,8 @@
+             else:
+                 window = None
+             if expected_error is None:
+-                with self.maybeWarnsRegex(UserWarning, "stft with return_complex=False"):
+-                    result = x.stft(n_fft, hop_length, win_length, window,
+-                                    center=center, return_complex=False)
++                result = x.stft(n_fft, hop_length, win_length, window,
++                                center=center, return_complex=False)
+                 # NB: librosa defaults to np.complex64 output, no matter what
+                 # the input dtype
+                 ref_result = librosa_stft(x, n_fft, hop_length, win_length, window, center)
+@@ -748,7 +747,7 @@
+         _test((10,), 5, 4, win_sizes=(1, 1), expected_error=RuntimeError)
+
+
+-    @skipCUDAIfRocm
++    @onlyOnCPUAndCUDA
+     @skipCPUIfNoMkl
+     @dtypes(torch.double, torch.cdouble)
+     def test_complex_stft_roundtrip(self, device, dtype):
+@@ -790,7 +789,7 @@
+                                       length=x.size(-1), **common_kwargs)
+             self.assertEqual(x_roundtrip, x)
+
+-    @skipCUDAIfRocm
++    @onlyOnCPUAndCUDA
+     @skipCPUIfNoMkl
+     @dtypes(torch.double, torch.cdouble)
+     def test_stft_roundtrip_complex_window(self, device, dtype):
+@@ -831,6 +830,7 @@
+                 self.assertEqual(x_roundtrip, x)
+
+
++    @onlyOnCPUAndCUDA
+     @skipCUDAIfRocm
+     @skipCPUIfNoMkl
+     @dtypes(torch.cdouble)
+@@ -851,7 +851,7 @@
+             actual = torch.stft(*args, window=window, center=False)
+             self.assertEqual(actual, expected)
+
+-    @skipCUDAIfRocm
++    @onlyOnCPUAndCUDA
+     @skipCPUIfNoMkl
+     @dtypes(torch.cdouble)
+     def test_complex_stft_real_equiv(self, device, dtype):
+@@ -885,6 +885,7 @@
+                                 center=center, normalized=normalized)
+             self.assertEqual(expected, actual)
+
++    @onlyOnCPUAndCUDA
+     @skipCUDAIfRocm
+     @skipCPUIfNoMkl
+     @dtypes(torch.cdouble)
+@@ -912,6 +913,7 @@
+                                  return_complex=True)
+             self.assertEqual(expected, actual)
+
++    @onlyOnCPUAndCUDA
+     @skipCUDAIfRocm
+     @skipCPUIfNoMkl
+     def test_complex_stft_onesided(self, device):
+@@ -934,12 +936,15 @@
+             x.stft(10, pad_mode='constant', onesided=True)
+
+     # stft is currently warning that it requires return-complex while an upgrader is written
++    @onlyOnCPUAndCUDA
++    @skipCPUIfNoMkl
+     def test_stft_requires_complex(self, device):
+         x = torch.rand(100)
+         y = x.stft(10, pad_mode='constant')
+         # with self.assertRaisesRegex(RuntimeError, 'stft requires the return_complex parameter'):
+         #     y = x.stft(10, pad_mode='constant')
+
++    @onlyOnCPUAndCUDA
+     @skipCUDAIfRocm
+     @skipCPUIfNoMkl
+     def test_fft_input_modification(self, device):
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.0_fix-noMKL-linear-algebra.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.0_fix-noMKL-linear-algebra.patch
@@ -0,0 +1,77 @@
+From f4824c2eca26887a0f7aeebb4e966c278258142a Mon Sep 17 00:00:00 2001
+From: Ivan Yashchuk <ivan.yashchuk@aalto.fi>
+Date: Fri, 12 Mar 2021 17:54:19 +0000
+Subject: [PATCH] Fixed worksize
+
+---
+ aten/src/ATen/native/BatchLinearAlgebra.cpp       | 8 ++++----
+ aten/src/ATen/native/BatchLinearAlgebra.h         | 2 +-
+ aten/src/ATen/native/BatchLinearAlgebraKernel.cpp | 2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
+index 39d4291b7a8d..2daf7a93cdea 100644
+--- a/aten/src/ATen/native/BatchLinearAlgebra.cpp
++++ b/aten/src/ATen/native/BatchLinearAlgebra.cpp
+@@ -695,7 +695,7 @@ static void apply_inverse(Tensor& self, Tensor& infos_lu, Tensor& infos_getri) {
+   int lwork = -1;
+   scalar_t wkopt;
+   lapackGetri<scalar_t>(n, self_data, lda, ipiv_data, &wkopt, lwork, &info);
+-  lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++  lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+   Tensor work = at::empty({lwork}, self.options());
+   auto work_data = work.data_ptr<scalar_t>();
+
+@@ -1211,7 +1211,7 @@ static void apply_geqrf(Tensor& self, Tensor& tau, int64_t m, int64_t n,
+   int lwork = -1;
+   scalar_t wkopt;
+   lapackGeqrf<scalar_t>(m, n, self_data, m, tau_data, &wkopt, lwork, &info);
+-  lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++  lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+   Tensor work = at::empty({lwork}, self.options());
+
+   for (const auto i : c10::irange(batch_size)) {
+@@ -1626,7 +1626,7 @@ static void apply_symeig(Tensor& self, Tensor& eigvals, bool eigenvectors, bool
+   }
+
+   lapackSymeig<scalar_t, value_t>(jobz, uplo, n, self_data, n, eigvals_data, &wkopt, lwork, rwork_data, &info);
+-  lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++  lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+   Tensor work = at::empty({lwork}, self.options());
+
+   for (const auto i : c10::irange(batch_size)) {
+@@ -1782,7 +1782,7 @@ static void apply_svd(Tensor& self, Tensor& U, Tensor& S, Tensor& VT,
+   int lwork = -1;
+   scalar_t wkopt;
+   lapackSvd<scalar_t, value_t>(jobz, m, n, self_data, lda, S_data, U_data, lda, VT_data, ldvt, &wkopt, lwork, rwork_data, iwork_data, &info);
+-  lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++  lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+   Tensor work = at::empty({lwork}, self.options());
+   auto work_data = work.data_ptr<scalar_t>();
+
+diff --git a/aten/src/ATen/native/BatchLinearAlgebra.h b/aten/src/ATen/native/BatchLinearAlgebra.h
+index 138819f6f4cd..59e71e2964e5 100644
+--- a/aten/src/ATen/native/BatchLinearAlgebra.h
++++ b/aten/src/ATen/native/BatchLinearAlgebra.h
+@@ -82,7 +82,7 @@ inline void apply_orgqr(Tensor& self, const Tensor& tau, Tensor& infos, int64_t
+   int lwork = -1;
+   scalar_t wkopt;
+   lapackOrgqr<scalar_t>(m, n_columns, k, self_data, lda, tau_data, &wkopt, lwork, &infos_data[0]);
+-  lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++  lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+   Tensor work = at::empty({lwork}, self.options());
+
+   for (int64_t i = 0; i < batch_size; i++) {
+diff --git a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp
+index ef64cef6a771..334f4d60ce44 100644
+--- a/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp
++++ b/aten/src/ATen/native/BatchLinearAlgebraKernel.cpp
+@@ -115,7 +115,7 @@ void apply_eig(const Tensor& self, bool eigenvectors, Tensor& vals_, Tensor& vec
+     int info;
+     lapackEig<scalar_t, value_t>('N', jobvr, n, self_data, n, wr,
+       nullptr, 1, vecs_data, ldvr, &wkopt, -1, rwork_data, &info);
+-    int lwork = static_cast<int>(real_impl<scalar_t, value_t>(wkopt));
++    int lwork = std::max<int>(1, real_impl<scalar_t, value_t>(wkopt));
+
+     // call again to do the actual work
+     Tensor work = at::empty({lwork}, self.dtype());
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.1-foss-2020b.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.8.1-foss-2020b.eb
@@ -0,0 +1,78 @@
+name = 'PyTorch'
+version = '1.8.1'
+
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+
+toolchain = {'name': 'foss', 'version': '2020b'}
+
+sources = [{
+    'filename': '%(name)s-%(version)s.tar.gz',
+    'git_config': {
+        'url': 'https://github.com/pytorch',
+        'repo_name': 'pytorch',
+        'tag': 'v%(version)s',
+        'recursive': True,
+    },
+}]
+patches = [
+    'PyTorch-1.6.0_fix-test-dataloader-fixed-affinity.patch',
+    'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
+    'PyTorch-1.7.0_increase-distributed-test-timeout.patch',
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    'PyTorch-1.8.0_fix-noMKL-linear-algebra.patch',
+    'PyTorch-1.8.0_correct-skip-tests-decorators.patch',
+]
+checksums = [
+    None,  # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
+    # PyTorch-1.6.0_fix-test-dataloader-fixed-affinity.patch
+    'a4208a46cd2098744daaba96cebb96cd91166f8fc616924315e05974bad80c67',
+    'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18',  # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
+    # PyTorch-1.7.0_increase-distributed-test-timeout.patch
+    '95abb468a35451fbd0f864ca843f6ad15ff8bfb909c3fd580f65859b26c9691c',
+    '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a',  # PyTorch-1.7.0_disable-dev-shm-test.patch
+    'a1ca9382b0eb333090536633092bab6fa281d26b491bf7b1849117f68ab0730c',  # PyTorch-1.8.0_fix-noMKL-linear-algebra.patch
+    # PyTorch-1.8.0_correct-skip-tests-decorators.patch
+    '4b2fe7616217dd6fd12d667cb1439dde58f84bf234fbf3e6026c4665fc697a2e',
+]
+
+osdependencies = [OS_PKG_IBVERBS_DEV]
+
+builddependencies = [
+    ('CMake', '3.18.4'),
+    ('hypothesis', '5.41.5'),
+]
+
+dependencies = [
+    ('Ninja', '1.10.1'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.8.6'),
+    ('protobuf', '3.14.0'),
+    ('protobuf-python', '3.14.0'),
+    ('pybind11', '2.6.0'),
+    ('SciPy-bundle', '2020.11'),
+    ('typing-extensions', '3.7.4.3'),
+    ('PyYAML', '5.3.1'),
+    ('MPFR', '4.1.0'),
+    ('GMP', '6.2.0'),
+    ('numactl', '2.0.13'),
+    ('FFmpeg', '4.3.1'),
+    ('Pillow', '8.0.1'),
+]
+
+excluded_tests = {
+    '': [
+        # Test from this suite timeout often. The process group backend is deprecated anyway
+        'distributed/rpc/test_process_group_agent',
+        # Potentially problematic save/load issue with test_lstm on only some machines. Tell users to verify save&load!
+        # https://github.com/pytorch/pytorch/issues/43209
+        'test_quantization',
+    ]
+}
+
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --verbose %(excluded_tests)s'
+
+sanity_check_commands = ["python -c 'import caffe2.python'"]
+tests = ['PyTorch-check-cpp-extension.py']
+
+moduleclass = 'devel'