Skip to content

Commit

Permalink
Skip failing elastic torck tests on GPU
Browse files Browse the repository at this point in the history
Signed-off-by: Enrico Minack <github@enrico.minack.dev>
  • Loading branch information
tgaddair authored and EnricoMi committed May 16, 2021
1 parent 85cfc6b commit acf8994
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion test/integration/test_elastic_torch.py
Expand Up @@ -13,11 +13,14 @@
# limitations under the License.
# ==============================================================================

import mock
import os
import unittest
import warnings
from distutils.version import LooseVersion

import mock

import torch
from elastic_common import BaseElasticTests


Expand All @@ -37,3 +40,17 @@ def test_all_hosts_blacklisted(self, mock_get_min_start_hosts):
@mock.patch('horovod.runner.gloo_run._get_min_start_hosts', return_value=1)
def test_min_hosts_timeout(self, mock_get_min_start_hosts):
self.skipTest('This test fails due to https://github.com/horovod/horovod/issues/2030')

@mock.patch('horovod.runner.elastic.driver.DISCOVER_HOSTS_FREQUENCY_SECS', 0.01)
@mock.patch('horovod.runner.gloo_run._get_min_start_hosts', return_value=1)
def test_fault_tolerance_without_scaling(self, mock_get_min_start_hosts):
if torch.cuda.is_available() and LooseVersion(torch.__version__) >= LooseVersion('1.9.0'):
self.skipTest('This test fails due to https://github.com/horovod/horovod/issues/2908')
super(ElasticTorchTests, self).test_fault_tolerance_without_scaling(mock_get_min_start_hosts)

@mock.patch('horovod.runner.elastic.driver.DISCOVER_HOSTS_FREQUENCY_SECS', 0.01)
@mock.patch('horovod.runner.gloo_run._get_min_start_hosts', return_value=1)
def test_single_rank_failure(self, mock_get_min_start_hosts):
if torch.cuda.is_available() and LooseVersion(torch.__version__) >= LooseVersion('1.9.0'):
self.skipTest('This test fails due to https://github.com/horovod/horovod/issues/2908')
super(ElasticTorchTests, self).test_single_rank_failure(mock_get_min_start_hosts)

0 comments on commit acf8994

Please sign in to comment.