From 825bb8cdfa953935eaf2d63a1c1e2bfe7d2bee70 Mon Sep 17 00:00:00 2001 From: chongxiaoc <74630762+chongxiaoc@users.noreply.github.com> Date: Sat, 23 Oct 2021 17:30:21 -0700 Subject: [PATCH] Lightning: set limit_train_batches and limit_val_batches (#3237) Tell Lightning trainer that how many batches a single epoch needs. Signed-off-by: Chongxiao Cao --- horovod/spark/lightning/remote.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/horovod/spark/lightning/remote.py b/horovod/spark/lightning/remote.py index f4f180955c..2c083810c8 100644 --- a/horovod/spark/lightning/remote.py +++ b/horovod/spark/lightning/remote.py @@ -193,6 +193,8 @@ def on_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") - 'gpus': _num_gpus, 'callbacks': callbacks, 'max_epochs': epochs, + 'limit_train_batches': _train_steps_per_epoch, + 'limit_val_batches': _val_steps_per_epoch, 'logger': train_logger, 'log_every_n_steps': log_every_n_steps, 'num_sanity_val_steps': 0,