-
Notifications
You must be signed in to change notification settings - Fork 685
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding easyconfigs: PyTorch-2.1.2-foss-2022b-CUDA-12.0.0.eb and patch…
…es: PyTorch-2.1.2_add-cuda-skip-markers.patch, PyTorch-2.1.2_fix-conj-mismatch-test-failures.patch, PyTorch-2.1.2_fix-device-mesh-check.patch, PyTorch-2.1.2_fix-locale-issue-in-nvrtcCompileProgram.patch, PyTorch-2.1.2_fix-test_extension_backend-without-vectorization.patch, PyTorch-2.1.2_fix-test_memory_profiler.patch, PyTorch-2.1.2_fix-test_torchinductor-rounding.patch, PyTorch-2.1.2_fix-vsx-vector-abs.patch, PyTorch-2.1.2_fix-vsx-vector-div.patch, PyTorch-2.1.2_fix-with_temp_dir-decorator.patch, PyTorch-2.1.2_fix-wrong-device-mesh-size-in-tests.patch, PyTorch-2.1.2_relax-cuda-tolerances.patch, PyTorch-2.1.2_remove-nccl-backend-default-without-gpus.patch, PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch, PyTorch-2.1.2_skip-failing-test_dtensor_ops-subtests.patch, PyTorch-2.1.2_skip-test_fsdp_tp_checkpoint_integration.patch, PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch
- Loading branch information
Showing
1 changed file
with
235 additions
and
0 deletions.
There are no files selected for viewing
235 changes: 235 additions & 0 deletions
235
easybuild/easyconfigs/p/PyTorch/PyTorch-2.1.2-foss-2022b-CUDA-12.0.0.eb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
name = 'PyTorch' | ||
version = '2.1.2' | ||
versionsuffix = '-CUDA-%(cudaver)s' | ||
|
||
homepage = 'https://pytorch.org/' | ||
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. | ||
PyTorch is a deep learning framework that puts Python first.""" | ||
|
||
toolchain = {'name': 'foss', 'version': '2022b'} | ||
|
||
source_urls = [GITHUB_RELEASE] | ||
sources = ['%(namelower)s-v%(version)s.tar.gz'] | ||
patches = [ | ||
'PyTorch-1.7.0_disable-dev-shm-test.patch', | ||
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', | ||
'PyTorch-1.12.1_add-hypothesis-suppression.patch', | ||
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', | ||
'PyTorch-1.12.1_fix-TestTorch.test_to.patch', | ||
'PyTorch-1.12.1_skip-test_round_robin.patch', | ||
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch', | ||
'PyTorch-1.13.1_fix-protobuf-dependency.patch', | ||
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch', | ||
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch', | ||
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch', | ||
'PyTorch-2.0.1_avoid-test_quantization-failures.patch', | ||
'PyTorch-2.0.1_fix-skip-decorators.patch', | ||
'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch', | ||
'PyTorch-2.0.1_fix-vsx-loadu.patch', | ||
'PyTorch-2.0.1_no-cuda-stubs-rpath.patch', | ||
'PyTorch-2.0.1_skip-failing-gradtest.patch', | ||
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch', | ||
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', | ||
'PyTorch-2.1.0_disable-gcc12-warning.patch', | ||
'PyTorch-2.1.0_disable-cudnn-tf32-for-too-strict-tests.patch', | ||
'PyTorch-2.1.0_fix-bufferoverflow-in-oneDNN.patch', | ||
'PyTorch-2.1.0_fix-test_numpy_torch_operators.patch', | ||
'PyTorch-2.1.0_fix-validationError-output-test.patch', | ||
'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch', | ||
'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch', | ||
'PyTorch-2.1.0_remove-sparse-csr-nnz-overflow-test.patch', | ||
'PyTorch-2.1.0_remove-test-requiring-online-access.patch', | ||
'PyTorch-2.1.0_skip-diff-test-on-ppc.patch', | ||
'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch', | ||
'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch', | ||
'PyTorch-2.1.0_skip-test_linear_fp32-without-MKL.patch', | ||
'PyTorch-2.1.0_skip-test_wrap_bad.patch', | ||
'PyTorch-2.1.2_add-cuda-skip-markers.patch', | ||
'PyTorch-2.1.2_fix-conj-mismatch-test-failures.patch', | ||
'PyTorch-2.1.2_fix-device-mesh-check.patch', | ||
'PyTorch-2.1.2_fix-locale-issue-in-nvrtcCompileProgram.patch', | ||
'PyTorch-2.1.2_fix-test_extension_backend-without-vectorization.patch', | ||
'PyTorch-2.1.2_fix-test_memory_profiler.patch', | ||
'PyTorch-2.1.2_fix-test_torchinductor-rounding.patch', | ||
'PyTorch-2.1.2_fix-vsx-vector-abs.patch', | ||
'PyTorch-2.1.2_fix-vsx-vector-div.patch', | ||
'PyTorch-2.1.2_fix-with_temp_dir-decorator.patch', | ||
'PyTorch-2.1.2_fix-wrong-device-mesh-size-in-tests.patch', | ||
'PyTorch-2.1.2_relax-cuda-tolerances.patch', | ||
'PyTorch-2.1.2_remove-nccl-backend-default-without-gpus.patch', | ||
'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch', | ||
'PyTorch-2.1.2_skip-failing-test_dtensor_ops-subtests.patch', | ||
'PyTorch-2.1.2_skip-test_fsdp_tp_checkpoint_integration.patch', | ||
'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch', | ||
] | ||
checksums = [ | ||
{'pytorch-v2.1.2.tar.gz': '85effbcce037bffa290aea775c9a4bad5f769cb229583450c40055501ee1acd7'}, | ||
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, | ||
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': | ||
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, | ||
{'PyTorch-1.12.1_add-hypothesis-suppression.patch': | ||
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, | ||
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': | ||
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, | ||
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, | ||
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'}, | ||
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch': | ||
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'}, | ||
{'PyTorch-1.13.1_fix-protobuf-dependency.patch': | ||
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'}, | ||
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch': | ||
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'}, | ||
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch': | ||
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'}, | ||
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch': | ||
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'}, | ||
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch': | ||
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'}, | ||
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'}, | ||
{'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch': | ||
'1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'}, | ||
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'}, | ||
{'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'}, | ||
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'}, | ||
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch': | ||
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'}, | ||
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': | ||
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, | ||
{'PyTorch-2.1.0_disable-gcc12-warning.patch': 'c858b8db0010f41005dc06f9a50768d0d3dc2d2d499ccbdd5faf8a518869a421'}, | ||
{'PyTorch-2.1.0_disable-cudnn-tf32-for-too-strict-tests.patch': | ||
'd895018ebdfd46e65d9f7645444a3b4c5bbfe3d533a08db559a04be34e01e478'}, | ||
{'PyTorch-2.1.0_fix-bufferoverflow-in-oneDNN.patch': | ||
'b15b1291a3c37bf6a4982cfbb3483f693acb46a67bc0912b383fd98baf540ccf'}, | ||
{'PyTorch-2.1.0_fix-test_numpy_torch_operators.patch': | ||
'84bb51a719abc677031a7a3dfe4382ff098b0cbd8b39b8bed2a7fa03f80ac1e9'}, | ||
{'PyTorch-2.1.0_fix-validationError-output-test.patch': | ||
'7eba0942afb121ed92fac30d1529447d892a89eb3d53c565f8e9d480e95f692b'}, | ||
{'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch': | ||
'3793b4b878be1abe7791efcbd534774b87862cfe7dc4774ca8729b6cabb39e7e'}, | ||
{'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch': | ||
'aef38adf1210d0c5455e91d7c7a9d9e5caad3ae568301e0ba9fc204309438e7b'}, | ||
{'PyTorch-2.1.0_remove-sparse-csr-nnz-overflow-test.patch': | ||
'0ac36411e76506b3354c85a8a1260987f66af947ee52ffc64230aee1fa02ea8b'}, | ||
{'PyTorch-2.1.0_remove-test-requiring-online-access.patch': | ||
'35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, | ||
{'PyTorch-2.1.0_skip-diff-test-on-ppc.patch': '394157dbe565ffcbc1821cd63d05930957412156cc01e949ef3d3524176a1dda'}, | ||
{'PyTorch-2.1.0_skip-dynamo-test_predispatch.patch': | ||
'6298daf9ddaa8542850eee9ea005f28594ab65b1f87af43d8aeca1579a8c4354'}, | ||
{'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch': | ||
'5229ca88a71db7667a90ddc0b809b2c817698bd6e9c5aaabd73d3173cf9b99fe'}, | ||
{'PyTorch-2.1.0_skip-test_linear_fp32-without-MKL.patch': | ||
'5dcc79883b6e3ec0a281a8e110db5e0a5880de843bb05653589891f16473ead5'}, | ||
{'PyTorch-2.1.0_skip-test_wrap_bad.patch': 'b8583125ee94e553b6f77c4ab4bfa812b89416175dc7e9b7390919f3b485cb63'}, | ||
{'PyTorch-2.1.2_add-cuda-skip-markers.patch': 'd007d6d0cdb533e7d01f503e9055218760123a67c1841c57585385144be18c9a'}, | ||
{'PyTorch-2.1.2_fix-conj-mismatch-test-failures.patch': | ||
'c164357efa4ce88095376e590ba508fc1daa87161e1e59544eda56daac7f2847'}, | ||
{'PyTorch-2.1.2_fix-device-mesh-check.patch': 'c0efc288bf3d9a9a3c8bbd2691348a589a2677ea43880a8c987db91c8de4806b'}, | ||
{'PyTorch-2.1.2_fix-locale-issue-in-nvrtcCompileProgram.patch': | ||
'f7adafb4e4d3b724b93237a259797b6ed6f535f83be0e34a7b759c71c6a8ddf2'}, | ||
{'PyTorch-2.1.2_fix-test_extension_backend-without-vectorization.patch': | ||
'cd1455495886a7d6b2d30d48736eb0103fded21e2e36de6baac719b9c52a1c92'}, | ||
{'PyTorch-2.1.2_fix-test_memory_profiler.patch': | ||
'30b0c9355636c0ab3dedae02399789053825dc3835b4d7dac6e696767772b1ce'}, | ||
{'PyTorch-2.1.2_fix-test_torchinductor-rounding.patch': | ||
'a0ef99192ee2ad1509c78a8377023d5be2b5fddb16f84063b7c9a0b53d979090'}, | ||
{'PyTorch-2.1.2_fix-vsx-vector-abs.patch': 'd67d32407faed7dc1dbab4bba0e2f7de36c3db04560ced35c94caf8d84ade886'}, | ||
{'PyTorch-2.1.2_fix-vsx-vector-div.patch': '11f497a6892eb49b249a15320e4218e0d7ac8ae4ce67de39e4a018a064ca1acc'}, | ||
{'PyTorch-2.1.2_fix-with_temp_dir-decorator.patch': | ||
'90bd001e034095329277d70c6facc4026b4ce6d7f8b8d6aa81c0176eeb462eb1'}, | ||
{'PyTorch-2.1.2_fix-wrong-device-mesh-size-in-tests.patch': | ||
'07a5e4233d02fb6348872838f4d69573c777899c6f0ea4e39ae23c08660d41e5'}, | ||
{'PyTorch-2.1.2_relax-cuda-tolerances.patch': '554ad09787f61080fafdb84216e711e32327aa357e2a9c40bb428eb6503dee6e'}, | ||
{'PyTorch-2.1.2_remove-nccl-backend-default-without-gpus.patch': | ||
'e6a1efe3d127fcbf4723476a7a1c01cfcf2ccb16d1fb250f478192623e8b6a15'}, | ||
{'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch': | ||
'7ace835af60c58d9e0754a34c19d4b9a0c3a531f19e5d0eba8e2e49206eaa7eb'}, | ||
{'PyTorch-2.1.2_skip-failing-test_dtensor_ops-subtests.patch': | ||
'6cf711bf26518550903b09ed4431de9319791e79d61aab065785d6608fd5cc88'}, | ||
{'PyTorch-2.1.2_skip-test_fsdp_tp_checkpoint_integration.patch': | ||
'943ee92f5fd518f608a59e43fe426b9bb45d7e7ad0ba04639e516db2d61fa57d'}, | ||
{'PyTorch-2.1.2_workaround_dynamo_failure_without_nnpack.patch': | ||
'fb96eefabf394617bbb3fbd3a7a7c1aa5991b3836edc2e5d2a30e708bfe49ba1'}, | ||
] | ||
|
||
osdependencies = [OS_PKG_IBVERBS_DEV] | ||
|
||
builddependencies = [ | ||
('CMake', '3.24.3'), | ||
('hypothesis', '6.68.2'), | ||
# For tests | ||
('pytest-flakefinder', '1.1.0'), | ||
('pytest-rerunfailures', '12.0'), | ||
('pytest-shard', '0.1.2'), | ||
] | ||
|
||
dependencies = [ | ||
('CUDA', '12.0.0', '', SYSTEM), | ||
('cuDNN', '8.8.0.121', '-CUDA-%(cudaver)s', SYSTEM), | ||
('magma', '2.7.1', '-CUDA-%(cudaver)s'), | ||
('NCCL', '2.16.2', '-CUDA-%(cudaver)s'), | ||
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions | ||
('Python', '3.10.8'), | ||
('protobuf', '23.0'), | ||
('protobuf-python', '4.23.0'), | ||
('pybind11', '2.10.3'), | ||
('SciPy-bundle', '2023.02'), | ||
('PyYAML', '6.0'), | ||
('MPFR', '4.2.0'), | ||
('GMP', '6.2.1'), | ||
('numactl', '2.0.16'), | ||
('FFmpeg', '5.1.2'), | ||
('Pillow', '9.4.0'), | ||
('expecttest', '0.1.3'), | ||
('networkx', '3.0'), | ||
('sympy', '1.12'), | ||
('Z3', '4.12.2', '-Python-%(pyver)s'), | ||
] | ||
|
||
use_pip = True | ||
buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step | ||
|
||
excluded_tests = { | ||
'': [ | ||
# This test seems to take too long on NVIDIA Ampere at least. | ||
'distributed/test_distributed_spawn', | ||
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375 | ||
'distributions/test_constraints', | ||
# no xdoctest | ||
'doctests', | ||
# failing on broadwell | ||
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 | ||
'test_native_mha', | ||
# intermittent failures on various systems | ||
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 | ||
'distributed/rpc/test_tensorpipe_agent', | ||
# Broken test, can't ever succeed, see https://github.com/pytorch/pytorch/issues/122184 | ||
'distributed/tensor/parallel/test_tp_random_state', | ||
# failures on OmniPath systems, which don't support some optional InfiniBand features | ||
# See https://github.com/pytorch/tensorpipe/issues/413 | ||
'distributed/pipeline/sync/skip/test_gpipe', | ||
'distributed/pipeline/sync/skip/test_leak', | ||
'distributed/pipeline/sync/test_bugs', | ||
'distributed/pipeline/sync/test_inplace', | ||
'distributed/pipeline/sync/test_pipe', | ||
'distributed/pipeline/sync/test_transparency', | ||
] | ||
} | ||
|
||
runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s' | ||
|
||
# Especially test_quantization has a few corner cases that are triggered by the random input values, | ||
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030 | ||
# test_nn is also prone to spurious failures: https://github.com/pytorch/pytorch/issues/118294 | ||
# So allow a low number of tests to fail as the tests "usually" succeed | ||
max_failed_tests = 10 | ||
|
||
# The readelf sanity check command can be taken out once the TestRPATH test from | ||
# https://github.com/pytorch/pytorch/pull/109493 is accepted, since it is then checked as part of the PyTorch test suite | ||
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT | ||
sanity_check_commands = [ | ||
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2, | ||
] | ||
|
||
tests = ['PyTorch-check-cpp-extension.py'] | ||
|
||
moduleclass = 'ai' |