Skip to content

Commit

Permalink
Update comments, doc strings, changelog
Browse files Browse the repository at this point in the history
Signed-off-by: Max H. Gerlach <git@maxgerlach.de>
  • Loading branch information
maxhgerlach committed Oct 15, 2021
1 parent 206ebf8 commit dd942ff
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 16 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Added

- TensorFlow: Added in-place broadcasting of variables. ([#3128](https://github.com/horovod/horovod/pull/3128))

### Changed

### Deprecated
Expand Down
9 changes: 5 additions & 4 deletions horovod/tensorflow/functions.py
Expand Up @@ -68,10 +68,11 @@ def broadcast_variables(variables, root_rank, process_set=global_process_set, in
Broadcasts variables from root rank to all other processes
in a process set (defaults to all Horovod processes).
Optionally, the broadcast may be performed in-place. This is only supported with
TensorFlow 2.6 or later. Reference variables (legacy support in TF 2) must all
be of the same data type. There is no such restriction for resource variables
(default in TF 2).
Optionally, the broadcast may be performed in-place, which avoids
temporary memory allocations and fragmentation. This is only
supported with TensorFlow 2.6 or later. Reference variables
(legacy support in TF 2) must all be of the same data type. There
is no such restriction for resource variables (default in TF 2).
Arguments:
variables: variables for broadcast
Expand Down
19 changes: 11 additions & 8 deletions horovod/tensorflow/mpi_ops.cc
Expand Up @@ -890,10 +890,6 @@ class HorovodBroadcastInplaceOp : public OpKernel {
variable_locks.reserve(num_variables_);

for (int tensor_index = 0; tensor_index < num_variables_; ++tensor_index) {
// Custom ops with resource variables are broken for at least TF 2.3, 2.4,
// and 2.5. A fix has been introduced to TF 2.6 via this PR:
// https://github.com/tensorflow/tensorflow/pull/47072

DataType dtype;
OP_REQUIRES_OK(
context, GetInputDataTypeFromVariable(context, tensor_index, dtype));
Expand Down Expand Up @@ -947,7 +943,9 @@ class HorovodBroadcastInplaceOp : public OpKernel {
any_failures_and_tensors_done) {
const bool do_lock = true;
const bool sparse = false;
// MaybeLockVariableInputMutexesInOrder() seems to be broken at the moment
// Here we need to replicate the functionality provided by
// MaybeLockVariableInputMutexesInOrder(). That function currently does
// not work as intended for input_ids not starting at 0. See:
// https://github.com/tensorflow/tensorflow/issues/51686
{
Var* var;
Expand Down Expand Up @@ -976,9 +974,10 @@ class HorovodBroadcastInplaceOp : public OpKernel {
std::string var_name = variable_names_[tensor_index];
if (context->input_dtype(tensor_index) == DT_RESOURCE && var_name.empty()) {
const ResourceHandle& handle = HandleFromInput(context, tensor_index);
// handle.name() seems to be something like _AnonymousVar18. The Python
// name attribute of the variable is apparently not passed through
// automatically. So we use this only if we do not have a proper name.
// We use handle.name() as a fallback only when we do not have a proper
// name because typically it seems to be something like _AnonymousVar18.
// The Python name attribute of the variable does not appear to be passed
// through automatically.
var_name = handle.name();
}

Expand Down Expand Up @@ -1047,6 +1046,8 @@ processes that do a broadcast on variables with the same names must have the
same dimensions for those variables. All variables must be located on the same
device and they must be of the same data type.
This requires TensorFlow 2.6+.
Arguments
root_rank: Rank that will send data, other ranks will receive data.
variable_names: Names associated to the variables (obtained via Python
Expand Down Expand Up @@ -1082,6 +1083,8 @@ processes that do a broadcast on variables with the same names must have the
same dimensions for those variables. All variables must be located on the same
device.
This requires TensorFlow 2.6+.
Arguments
root_rank: Rank that will send data, other ranks will receive data.
variable_names: Names associated to the variables (obtained via Python
Expand Down
4 changes: 0 additions & 4 deletions test/parallel/test_tensorflow.py
@@ -1,4 +1,3 @@

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Modifications copyright (C) 2018 Uber Technologies, Inc.
# Modifications copyright (C) 2019 Intel Corporation
Expand Down Expand Up @@ -2187,7 +2186,6 @@ def test_horovod_broadcast_cpu(self):
tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
"hvd.broadcast produces incorrect broadcasted tensor")


def test_horovod_broadcast_gpu(self):
"""Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors on GPU."""
# Only do this test if there are GPUs available.
Expand Down Expand Up @@ -2227,7 +2225,6 @@ def test_horovod_broadcast_gpu(self):
tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
"hvd.broadcast produces incorrect broadcasted tensor")


def test_horovod_broadcast_inplace_cpu(self):
"""Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on CPU."""
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
Expand Down Expand Up @@ -2278,7 +2275,6 @@ def test_horovod_broadcast_inplace_cpu(self):
tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
"Inplace hvd.broadcast_ produces incorrect broadcasted variable value")


def test_horovod_broadcast_inplace_gpu(self):
"""Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on GPU."""
if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
Expand Down

0 comments on commit dd942ff

Please sign in to comment.