Skip to content

Commit

Permalink
Remove callback from other processes on the same machine
Browse files Browse the repository at this point in the history
Signed-off-by: Rich Porter <rich.porter@uber.com>
  • Loading branch information
porterrf committed Sep 7, 2021
1 parent cc8d448 commit 38ff578
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
3 changes: 2 additions & 1 deletion examples/spark/keras/keras_spark_mnist.py
Expand Up @@ -115,7 +115,8 @@
batch_size=args.batch_size,
epochs=args.epochs,
inmemory_cache_all=True,
verbose=1)
verbose=1,
callbacks=[keras.callbacks.TensorBoard(profile_batch=5)])

keras_model = keras_estimator.fit(train_df).setOutputCols(['label_prob'])

Expand Down
10 changes: 8 additions & 2 deletions horovod/spark/keras/remote.py
Expand Up @@ -151,6 +151,11 @@ def train(serialized_model, train_rows, val_rows, avg_row_size):
# TensorBoard, or other metrics-based callbacks.
hvd.callbacks.MetricAverageCallback(),
]

if hvd.local_rank() != 0:
# The TB callback appears to conflict across processes on the same machine
user_callbacks[:] = [c for c in user_callbacks if not isinstance(c, k.callbacks.TensorBoard)]

callbacks += user_callbacks

# Horovod: save checkpoints only on the first worker to prevent other workers from
Expand Down Expand Up @@ -180,12 +185,13 @@ def train(serialized_model, train_rows, val_rows, avg_row_size):
for i, c in enumerate(callbacks):
if isinstance(c, k.callbacks.TensorBoard):
tb_callback = c
print(f"Found TensorBoard callback, updating log_dir to {logs_dir}")
tb_callback.log_dir = logs_dir
break
if tb_callback:
# Rather than a possibly arbitrary order, we always have the TensorBoard
# Rather than a possibly arbitrary order, we always place the TensorBoard
# callback right before the SyncCallback
callbacks.remove(i)
callbacks.pop(i)
callbacks.append(tb_callback or k.callbacks.TensorBoard(logs_dir))
callbacks.append(SyncCallback(run_output_dir, remote_store.sync, k))

Expand Down

0 comments on commit 38ff578

Please sign in to comment.