Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{devel}[foss/2021a] PyTorch v1.10.0 w/ Python 3.9.5 (CPU-only) #14460

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
112 changes: 112 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-1.10.0-foss-2021a.eb
@@ -0,0 +1,112 @@
name = 'PyTorch'
version = '1.10.0'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2021a'}

sources = [{
'filename': '%(name)s-%(version)s.tar.gz',
'git_config': {
'url': 'https://github.com/pytorch',
'repo_name': 'pytorch',
'tag': 'v%(version)s',
'recursive': True,
},
}]
patches = [
'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.7.1_correctly-pass-jit_opt_level.patch',
'PyTorch-1.8.1_increase-distributed-test-timeout.patch',
'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch',
'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch',
'PyTorch-1.10.0_fix-test-cond-cpu.patch',
'PyTorch-1.10.0_fix-vnni-detection.patch',
'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch',
'PyTorch-1.10.0_skip_failing_ops_tests.patch',
'PyTorch-1.10.0_skip_nan_tests_openblas.patch',
]
checksums = [
None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
'622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch
# PyTorch-1.7.1_correctly-pass-jit_opt_level.patch
'd4d967d47f8a6172fcbf57f0a61835482968850967c4fdb01108b720696a988d',
# PyTorch-1.8.1_increase-distributed-test-timeout.patch
'7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071',
# PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
# PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
'313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
# PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch
'426c9ead1a74b656748d4c8bf8afd4303d8b9f2394ad22b21a845d07c8ca1d12',
# PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch
'67152215e4530a9b1d7349fb20864445fd815288f04ab9e96e45c73b2d87827a',
# PyTorch-1.10.0_fix-test-cond-cpu.patch
'51f83f5d5ef69656ef35b73f17e0671e70113798421be11ea4c7b56ffcc4da03',
# PyTorch-1.10.0_fix-vnni-detection.patch
'1f3664c0febfa2a3fc4c0cd3bae185f289716ac0b6c3d7e8fa1cee19ba62b7cc',
# PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch
'e65afb01786f7f030ccb5faada1eb474bb0c418bcadcf1baaa71a4fa2f3f4240',
# PyTorch-1.10.0_skip_failing_ops_tests.patch
'399af94ffcef4a6db5226552c46f11e9b0f0f371b2d7924b9e5764d2281581ab',
# PyTorch-1.10.0_skip_nan_tests_openblas.patch
'7d3f83e3056d9e47a460790313238f28708beb596cafaa7ae55e374d368bbedf',
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.20.1'),
('hypothesis', '6.13.1'),
]

dependencies = [
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
('Python', '3.9.5'),
('protobuf', '3.17.3'),
('protobuf-python', '3.17.3'),
('pybind11', '2.6.2'),
('SciPy-bundle', '2021.05'),
('typing-extensions', '3.10.0.0'),
('PyYAML', '5.4.1'),
('MPFR', '4.1.0'),
('GMP', '6.2.1'),
('numactl', '2.0.14'),
('FFmpeg', '4.3.2'),
('Pillow', '8.2.0'),
('expecttest', '0.1.3'),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']

custom_opts = ["USE_CUPTI_SO=1"]

excluded_tests = {
'': [
# Bad tests: https://github.com/pytorch/pytorch/issues/60260
'distributed/elastic/utils/distributed_test',
'distributed/elastic/multiprocessing/api_test',
# These tests fail on A10s at the very least, they time out forever no matter how long the timeout is.
# Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html
# 'distributed/test_distributed_fork',
'distributed/test_distributed_spawn',
# Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
'test_optim',
# Test from this suite timeout often. The process group backend is deprecated anyway
# 'distributed/rpc/test_process_group_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

sanity_check_commands = ["python -c 'import caffe2.python'"]
tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'devel'