From f010669674edc9c6073c1b340d1effaf29d44c25 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 22 Sep 2022 19:58:48 +0000 Subject: [PATCH] Increase timeout for ProcessGroupGlooTest (#85474) We see spurious failures due to timeouts in`test_allreduce_coalesced_basics` but only when running the whole test suite with `python run_test.py --verbose -i distributed/test_c10d_gloo`. Increasing the timeout to 50s should provide enough leeway to avoid this. Note that the default for the `_timeout` is 30 minutes. Originally reported in EasyBuild at https://github.com/easybuilders/easybuild-easyconfigs/pull/15137#issuecomment-1073809305 and patch proposed by @casparvl Pull Request resolved: https://github.com/pytorch/pytorch/pull/85474 Approved by: https://github.com/rohan-varma --- test/distributed/test_c10d_gloo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py index 8fb2a8b32433..0084ca58348f 100644 --- a/test/distributed/test_c10d_gloo.py +++ b/test/distributed/test_c10d_gloo.py @@ -215,7 +215,7 @@ def setUp(self): def opts(self, threads=2): opts = c10d.ProcessGroupGloo._Options() - opts._timeout = 5.0 + opts._timeout = 50.0 opts._devices = [create_device(interface=LOOPBACK)] opts._threads = threads return opts