diff --git a/horovod/common/ops/adasum/adasum_mpi.cc b/horovod/common/ops/adasum/adasum_mpi.cc index 5eb15995a3..dd85aadcf6 100644 --- a/horovod/common/ops/adasum/adasum_mpi.cc +++ b/horovod/common/ops/adasum/adasum_mpi.cc @@ -28,8 +28,8 @@ AdasumMPI::~AdasumMPI() { void AdasumMPI::InitializeVHDDReductionComms() { int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(mpi_context_->mpi_comm, &rank); + MPI_Comm_size(mpi_context_->mpi_comm, &size); // Initialize communication groups for the vector halving, distance doubling // (VHDD) Adasum reduction. These are used in computing dot products and @@ -38,10 +38,10 @@ void AdasumMPI::InitializeVHDDReductionComms() { // includes two elements: this rank and it's first VHDD neighbor. The // subsequent groups grow to include any ranks the previous group // communicates with. Thus the sizes of the groups are 2,4,8... up to the - // size of MPI_COMM_WORLD. In essence, a reduction group includes all nodes + // size of mpi_context->mpi_comm. In essence, a reduction group includes all nodes // that a tensor may be split across. MPI_Group world_group; - MPI_Comm_group(MPI_COMM_WORLD, &world_group); + MPI_Comm_group(mpi_context_->mpi_comm, &world_group); int nearest_power_2 = 1; int log_size; for (nearest_power_2 = 1, log_size = 0; (nearest_power_2 << 1) <= size; @@ -59,7 +59,7 @@ void AdasumMPI::InitializeVHDDReductionComms() { } MPI_Group red_group; MPI_Group_incl(world_group, (level << 1), node_rank, &red_group); - MPI_Comm_create_group(MPI_COMM_WORLD, red_group, 0, + MPI_Comm_create_group(mpi_context_->mpi_comm, red_group, 0, &reduction_comms_[shift_val - 1]); MPI_Group_free(&red_group); } diff --git a/test/parallel/test_torch.py b/test/parallel/test_torch.py index dac51959a9..e0dcc32742 100644 --- a/test/parallel/test_torch.py +++ b/test/parallel/test_torch.py @@ -43,7 +43,6 @@ from common import mpi_env_rank_and_size, skip_or_fail_gpu_test, temppath _1_5_api = LooseVersion(torch.__version__) >= LooseVersion('1.5.0') -_1_10_api = LooseVersion(torch.__version__) >= LooseVersion('1.10.0') _is_mac = platform.system() == 'Darwin' ccl_supported_types = set([torch.ByteTensor, torch.CharTensor, torch.ShortTensor, @@ -2540,12 +2539,6 @@ def forward(self, x): def test_delta_optimizer(self): """Test that delta optimizer.""" - if _1_10_api: - # On PyTorch 1.10, if this test is not skipped and when tests are run in alphabetical order, the later - # test_dynamic_requires_grad can run into a deadlock. - # TODO: Understand and fix the root cause of these deadlocks. - self.skipTest("Deadlocks with PyTorch 1.10") - hvd.init() if not hvd.mpi_enabled(): # TODO support non-MPI Adasum operation @@ -2583,6 +2576,7 @@ def forward(self, x): opt.zero_grad() loss.backward() opt.step() + hvd.barrier() def test_duplicate_names(self): """Test that passing duplicate names to optimizer will fail."""