Update comments, doc strings, changelog

Signed-off-by: Max H. Gerlach <git@maxgerlach.de>
horovod · Oct 15, 2021 · dd942ff · dd942ff
1 parent 206ebf8
commit dd942ff
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- TensorFlow: Added in-place broadcasting of variables. ([#3128](https://github.com/horovod/horovod/pull/3128))
+
 ### Changed
 
 ### Deprecated

diff --git a/horovod/tensorflow/functions.py b/horovod/tensorflow/functions.py
@@ -68,10 +68,11 @@ def broadcast_variables(variables, root_rank, process_set=global_process_set, in
     Broadcasts variables from root rank to all other processes
     in a process set (defaults to all Horovod processes).
 
-    Optionally, the broadcast may be performed in-place. This is only supported with
-    TensorFlow 2.6 or later. Reference variables (legacy support in TF 2) must all
-    be of the same data type. There is no such restriction for resource variables
-    (default in TF 2).
+    Optionally, the broadcast may be performed in-place, which avoids
+    temporary memory allocations and fragmentation. This is only
+    supported with TensorFlow 2.6 or later. Reference variables
+    (legacy support in TF 2) must all be of the same data type. There
+    is no such restriction for resource variables (default in TF 2).
 
     Arguments:
         variables: variables for broadcast

diff --git a/horovod/tensorflow/mpi_ops.cc b/horovod/tensorflow/mpi_ops.cc
@@ -890,10 +890,6 @@ class HorovodBroadcastInplaceOp : public OpKernel {
     variable_locks.reserve(num_variables_);
 
     for (int tensor_index = 0; tensor_index < num_variables_; ++tensor_index) {
-      // Custom ops with resource variables are broken for at least TF 2.3, 2.4,
-      // and 2.5. A fix has been introduced to TF 2.6 via this PR:
-      // https://github.com/tensorflow/tensorflow/pull/47072
-
       DataType dtype;
       OP_REQUIRES_OK(
           context, GetInputDataTypeFromVariable(context, tensor_index, dtype));
@@ -947,7 +943,9 @@ class HorovodBroadcastInplaceOp : public OpKernel {
               any_failures_and_tensors_done) {
     const bool do_lock = true;
     const bool sparse = false;
-    // MaybeLockVariableInputMutexesInOrder() seems to be broken at the moment
+    // Here we need to replicate the functionality provided by
+    // MaybeLockVariableInputMutexesInOrder(). That function currently does
+    // not work as intended for input_ids not starting at 0. See:
     // https://github.com/tensorflow/tensorflow/issues/51686
     {
       Var* var;
@@ -976,9 +974,10 @@ class HorovodBroadcastInplaceOp : public OpKernel {
     std::string var_name = variable_names_[tensor_index];
     if (context->input_dtype(tensor_index) == DT_RESOURCE && var_name.empty()) {
       const ResourceHandle& handle = HandleFromInput(context, tensor_index);
-      // handle.name() seems to be something like _AnonymousVar18. The Python
-      // name attribute of the variable is apparently not passed through
-      // automatically. So we use this only if we do not have a proper name.
+      // We use handle.name() as a fallback only when we do not have a proper
+      // name because typically it seems to be something like _AnonymousVar18.
+      // The Python name attribute of the variable does not appear to be passed
+      // through automatically.
       var_name = handle.name();
     }
 
@@ -1047,6 +1046,8 @@ processes that do a broadcast on variables with the same names must have the
 same dimensions for those variables. All variables must be located on the same
 device and they must be of the same data type.
 
+This requires TensorFlow 2.6+.
+
 Arguments
     root_rank:      Rank that will send data, other ranks will receive data.
     variable_names: Names associated to the variables (obtained via Python
@@ -1082,6 +1083,8 @@ processes that do a broadcast on variables with the same names must have the
 same dimensions for those variables. All variables must be located on the same
 device.
 
+This requires TensorFlow 2.6+.
+
 Arguments
     root_rank:      Rank that will send data, other ranks will receive data.
     variable_names: Names associated to the variables (obtained via Python

diff --git a/test/parallel/test_tensorflow.py b/test/parallel/test_tensorflow.py
@@ -1,4 +1,3 @@
-
 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 # Modifications copyright (C) 2018 Uber Technologies, Inc.
 # Modifications copyright (C) 2019 Intel Corporation
@@ -2187,7 +2186,6 @@ def test_horovod_broadcast_cpu(self):
                     tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
                 "hvd.broadcast produces incorrect broadcasted tensor")
 
-
     def test_horovod_broadcast_gpu(self):
         """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors on GPU."""
         # Only do this test if there are GPUs available.
@@ -2227,7 +2225,6 @@ def test_horovod_broadcast_gpu(self):
                     tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
                 "hvd.broadcast produces incorrect broadcasted tensor")
 
-
     def test_horovod_broadcast_inplace_cpu(self):
         """Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on CPU."""
         if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):
@@ -2278,7 +2275,6 @@ def test_horovod_broadcast_inplace_cpu(self):
                             tf.cast(root_tensor, tf.int32), tf.cast(broadcasted_tensor, tf.int32)))),
                         "Inplace hvd.broadcast_ produces incorrect broadcasted variable value")
 
-
     def test_horovod_broadcast_inplace_gpu(self):
         """Test that the inplace broadcast correctly broadcasts 1D, 2D, 3D variables on GPU."""
         if LooseVersion(tf.__version__) < LooseVersion('2.6.0'):