Skip to content

Commit

Permalink
Address review comments.
Browse files Browse the repository at this point in the history
Signed-off-by: Josh Romero <joshr@nvidia.com>
  • Loading branch information
romerojosh committed Nov 19, 2020
1 parent 5d24fff commit 9b3ce49
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 45 deletions.
31 changes: 0 additions & 31 deletions horovod/common/group_table.cc
Expand Up @@ -40,27 +40,6 @@ bool GroupTable::empty(void) const {
return tensor_name_to_id_.empty();
}

int32_t GroupTable::RegisterGroup(const std::vector<std::string>& tensor_names) {
std::lock_guard<std::mutex> guard(mutex_);

int32_t group_id;
if (!free_ids_.empty()) {
// Reuse old group_id
group_id = free_ids_.front();
free_ids_.pop();
} else {
// Create a new group_id
group_id = next_group_id_++;
}

for (auto& name : tensor_names) {
tensor_name_to_id_[name] = group_id;
}
id_to_tensor_names_[group_id] = tensor_names;

return group_id;
}

int32_t GroupTable::RegisterGroup(std::vector<std::string>&& tensor_names) {
std::lock_guard<std::mutex> guard(mutex_);

Expand All @@ -82,16 +61,6 @@ int32_t GroupTable::RegisterGroup(std::vector<std::string>&& tensor_names) {
return group_id;
}

void GroupTable::DeregisterGroup(int32_t group_id) {
std::lock_guard<std::mutex> guard(mutex_);
for (auto& entry : id_to_tensor_names_[group_id]) {
tensor_name_to_id_.erase(entry);
}
id_to_tensor_names_.erase(group_id);

free_ids_.push(group_id);
}

void GroupTable::DeregisterGroups(const std::vector<std::string>& tensor_names) {
std::lock_guard<std::mutex> guard(mutex_);

Expand Down
2 changes: 0 additions & 2 deletions horovod/common/group_table.h
Expand Up @@ -36,9 +36,7 @@ class GroupTable {
const std::vector<std::string>& GetGroupTensorNames(int32_t group_id) const;
bool empty(void) const;

int32_t RegisterGroup(const std::vector<std::string>& tensor_names);
int32_t RegisterGroup(std::vector<std::string>&& tensor_names);
void DeregisterGroup(int32_t group_id);
void DeregisterGroups(const std::vector<std::string>& tensor_names);

private:
Expand Down
7 changes: 7 additions & 0 deletions horovod/common/util.py
Expand Up @@ -244,3 +244,10 @@ def num_rank_is_power_2(num_rank):
TODO support non-power of 2 ranks.
"""
return num_rank != 0 and ((num_rank & (num_rank -1)) == 0)

def split_list(l, n):
"""
Splits list l into n approximately even sized chunks.
"""
d, r = divmod(len(l), n)
return [l[i * d + min(i, r):(i + 1) * d + min(i + 1, r)] for i in range(n)]
12 changes: 5 additions & 7 deletions horovod/mxnet/__init__.py
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.
# ==============================================================================

from horovod.common.util import check_extension
from horovod.common.util import check_extension, split_list

check_extension('horovod.mxnet', 'HOROVOD_WITH_MXNET',
__file__, 'mpi_lib')
Expand Down Expand Up @@ -60,9 +60,8 @@ def _do_allreduce(self, index, grad):

if isinstance(index, (tuple, list)):
if (self._num_groups > 0):
d, r = divmod(len(grad), self._num_groups)
grad_split = [grad[n * d + min(n, r):(n + 1) * d + min(n + 1, r)] for n in range(self._num_groups)]
index_split = [index[n * d + min(n, r):(n + 1) * d + min(n + 1, r)] for n in range(self._num_groups)]
grad_split = split_list(grad, self._num_groups)
index_split = split_list(index, self._num_groups)

for i, (grads, indices) in enumerate(zip(grad_split, index_split)):
grouped_allreduce_(tensors=grads, average=False, name="{}:{}".format(indices[0], indices[-1]), priority=-i,
Expand Down Expand Up @@ -157,9 +156,8 @@ def _allreduce_grads(self):
grads.append(param.list_grad()[0])
names.append(self._prefix + str(i))

d, r = divmod(len(grads), self._num_groups)
grads_split = [grads[n * d + min(n, r):(n + 1) * d + min(n + 1, r)] for n in range(self._num_groups)]
names_split = [names[n * d + min(n, r):(n + 1) * d + min(n + 1, r)] for n in range(self._num_groups)]
grads_split = split_list(grads, self._num_groups)
names_split = split_list(names, self._num_groups)

for i, (group_grads, group_names) in enumerate(zip(grads_split, names_split)):
# For better performance, enqueue groups in separate grouped_allreduce calls by dtype.
Expand Down
5 changes: 2 additions & 3 deletions horovod/tensorflow/__init__.py
Expand Up @@ -19,7 +19,7 @@
import os
import warnings

from horovod.common.util import check_extension, gpu_available
from horovod.common.util import check_extension, gpu_available, split_list

check_extension('horovod.tensorflow', 'HOROVOD_WITH_TENSORFLOW', __file__, 'mpi_lib')

Expand Down Expand Up @@ -352,8 +352,7 @@ def allreduce_grads(grads):

if num_groups > 0:
grads_clean = [grad for grad in grads if grad is not None]
d, r = divmod(len(grads_clean), num_groups)
grads_split = [grads_clean[n * d + min(n, r):(n + 1) * d + min(n + 1, r)] for n in range(num_groups)]
grads_split = split_list(grads_clean, num_groups)

reduce_ops = []
for group in grads_split:
Expand Down
6 changes: 4 additions & 2 deletions horovod/torch/optimizer.py
Expand Up @@ -21,6 +21,8 @@

import torch

from horovod.common.util import split_list

from horovod.torch.compression import Compression
from horovod.torch.functions import broadcast_object
from horovod.torch.mpi_ops import allreduce_async_, grouped_allreduce_async_
Expand Down Expand Up @@ -122,8 +124,8 @@ def _register_hooks(self):
p_list = sorted(p_list, key=lambda p : p_list_names.index(self._parameter_names.get(p)))

# Form groups
d, r = divmod(len(p_list), self._num_groups)
p_groups = [tuple(p_list[n * d + min(n, r):(n + 1) * d + min(n + 1, r)]) for n in range(self._num_groups)]
p_groups = split_list(p_list, self._num_groups)
p_groups = [tuple(p) for p in p_groups]
for group in p_groups:
for p in group:
self._p_to_group[p] = group
Expand Down

0 comments on commit 9b3ce49

Please sign in to comment.