drop usage of deprecated distributed_backend (Lightning-AI#5009)

Co-authored-by: chaton <thomas@grid.ai> Co-authored-by: Roger Shieh <sh.rog@protonmail.ch>
gianscarpe · Dec 9, 2020 · 53d7c95 · 53d7c95
1 parent 2c11d96
commit 53d7c95
Show file tree

Hide file tree

Showing 34 changed files with 130 additions and 131 deletions.
diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py
@@ -105,29 +105,29 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
                     reason="test should be run outside of pytest")
-@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 32")
+@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
 def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
     plugin_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.distributed_backend,
+        accelerator=args.accelerator,
         plugin=DDPShardedPlugin(),
-        model_cls=SeedTrainLoaderModel
+        model_cls=SeedTrainLoaderModel,
     )
 
 
 @pytest.mark.skipif(not FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
                     reason="test should be run outside of pytest")
-@DDPLauncher.run("--distributed_backend ddp --gpus 2  --precision 16")
+@DDPLauncher.run("--accelerator ddp --gpus 2  --precision 16")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
     plugin_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.distributed_backend,
+        accelerator=args.accelerator,
         plugin=DDPShardedPlugin(),
-        model_cls=SeedTrainLoaderModel
+        model_cls=SeedTrainLoaderModel,
     )
 
 

diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py
@@ -210,7 +210,7 @@ def main(args: Namespace) -> None:
     if args.seed is not None:
         pl.seed_everything(args.seed)
 
-    if args.distributed_backend == 'ddp':
+    if args.accelerator == 'ddp':
         # When using a single GPU per process and per
         # DistributedDataParallel, we need to divide the batch size
         # ourselves based on the total number of GPUs we have

diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py
@@ -341,7 +341,7 @@ def main(args) -> None:
 
     trainer = pl.Trainer(
         gpus=1,
-        distributed_backend='dp',
+        accelerator='dp',
         val_check_interval=100
     )
 

diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py
@@ -214,7 +214,7 @@ def main(hparams: Namespace):
         logger=logger,
         max_epochs=hparams.epochs,
         accumulate_grad_batches=hparams.grad_batches,
-        distributed_backend=hparams.distributed_backend,
+        accelerator=hparams.accelerator,
         precision=16 if hparams.use_amp else 32,
     )
 

diff --git a/pytorch_lightning/accelerators/accelerator_connector.py b/pytorch_lightning/accelerators/accelerator_connector.py
@@ -87,7 +87,7 @@ def on_trainer_init(
         self.trainer.tpu_id = self.trainer.tpu_cores[0] if isinstance(self.trainer.tpu_cores, list) else None
 
         if num_processes != 1 and distributed_backend != "ddp_cpu":
-            rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.")
+            rank_zero_warn("num_processes is only used for `accelerator='ddp_cpu'`. Ignoring it.")
         self.trainer.num_processes = num_processes
 
         # override with environment flag
@@ -276,7 +276,7 @@ def select_accelerator(self):
             accelerator_backend = accelerators.CPUAccelerator(self.trainer, cluster_env)
         else:
             raise MisconfigurationException(
-                f'Trainer(distributed_backend={self.trainer.distributed_backend} is not a supported backend'
+                f'Trainer(accelerator={self.trainer.distributed_backend} is not a supported backend'
             )
 
         return accelerator_backend
@@ -299,8 +299,8 @@ def set_distributed_mode(self):
             elif self.trainer.num_gpus > 1:
                 rank_zero_warn(
                     'You requested multiple GPUs but did not specify a backend, e.g.'
-                    ' Trainer(distributed_backend="dp"|"ddp"|"ddp2").'
-                    ' Setting distributed_backend="ddp_spawn" for you.'
+                    ' `Trainer(accelerator="dp"|"ddp"|"ddp2")`.'
+                    ' Setting `accelerator="ddp_spawn"` for you.'
                 )
                 self.trainer.distributed_backend = "ddp_spawn"
 
@@ -342,7 +342,7 @@ def set_distributed_mode(self):
         if self.trainer.num_nodes > 1 and not (self.trainer.use_ddp2 or self.trainer.use_ddp):
             raise MisconfigurationException(
                 'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. '
-                'To silence this warning set distributed_backend=ddp or distributed_backend=ddp2'
+                'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`'
             )
 
         rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self.trainer.on_gpu}')
@@ -366,7 +366,7 @@ def check_horovod(self):
         """Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
         if not HOROVOD_AVAILABLE:
             raise MisconfigurationException(
-                'Requested `distributed_backend="horovod"`, but Horovod is not installed.'
+                'Requested `accelerator="horovod"`, but Horovod is not installed.'
                 'Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]'
             )
 

diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py
@@ -174,7 +174,7 @@ def _check_can_spawn_children(self):
         if self._has_spawned_children:
             raise RuntimeError(
                 "You tried to run `.fit` or `.test` multiple times in the same script."
-                " This is not supported in DDP mode, switch to `distributed_backend='ddp_spawn'` instead."
+                " This is not supported in DDP mode, switch to `accelerator='ddp_spawn'` instead."
             )
 
     def set_world_ranks(self, process_idx):

diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
@@ -69,12 +69,12 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
             if dataloader.num_workers > 0 and using_spawn:
                 rank_zero_warn('Dataloader(num_workers>0) and ddp_spawn do not mix well!'
                                ' Your performance might suffer dramatically.'
-                               ' Please consider setting distributed_backend=ddp to use num_workers > 0'
+                               ' Please consider setting accelerator=ddp to use num_workers > 0'
                                ' (this is a bottleneck of Python .spawn() and PyTorch')
 
             elif dataloader.num_workers == 0 and using_spawn:
-                rank_zero_warn('You are using `distributed_backend=ddp_spawn` with num_workers=0.'
-                               ' For much faster performance, switch to `distributed_backend=ddp`'
+                rank_zero_warn('You are using `accelerator=ddp_spawn` with num_workers=0.'
+                               ' For much faster performance, switch to `accelerator=ddp`'
                                ' and set `num_workers>0`')
 
             elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn:

diff --git a/tests/backends/ddp_model.py b/tests/backends/ddp_model.py
@@ -33,7 +33,7 @@ def main():
     parser.add_argument('--tmpdir')
     parser.add_argument('--workdir')
     parser.set_defaults(gpus=2)
-    parser.set_defaults(distributed_backend="ddp")
+    parser.set_defaults(accelerator="ddp")
     args = parser.parse_args()
 
     model = EvalModelTemplate()

diff --git a/tests/backends/test_accelerator_connector.py b/tests/backends/test_accelerator_connector.py
@@ -47,8 +47,8 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
-        callbacks=[CB()]
+        accelerator='ddp_cpu',
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -68,9 +68,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -90,9 +90,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -120,9 +120,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -152,9 +152,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp2',
+        accelerator='ddp2',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -181,9 +181,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -210,9 +210,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp2',
+        accelerator='ddp2',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -239,9 +239,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -267,9 +267,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -304,9 +304,9 @@ def on_fit_start(self, trainer, pl_module):
     trainer = Trainer(
         plugins=[CustomCluster()],
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):

diff --git a/tests/backends/test_ddp.py b/tests/backends/test_ddp.py
@@ -22,7 +22,7 @@
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
@@ -38,7 +38,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
@@ -54,7 +54,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):

diff --git a/tests/backends/test_ddp_spawn.py b/tests/backends/test_ddp_spawn.py
@@ -34,7 +34,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
     )
 
     model = EvalModelTemplate()
@@ -51,8 +51,8 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
-        progress_bar_refresh_rate=0
+        accelerator='ddp_spawn',
+        progress_bar_refresh_rate=0,
     )
 
     model = EvalModelTemplate()
@@ -79,7 +79,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
         limit_train_batches=0.2,
         limit_val_batches=0.2,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn'
+        accelerator='ddp_spawn',
     )
     result = trainer.fit(model, **fit_options)
     assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
diff --git a/tests/backends/test_dp.py b/tests/backends/test_dp.py
@@ -37,7 +37,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
+        accelerator='dp',
     )
 
     model = EvalModelTemplate()
@@ -54,8 +54,8 @@ def test_multi_gpu_model_dp(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
-        progress_bar_refresh_rate=0
+        accelerator='dp',
+        progress_bar_refresh_rate=0,
     )
 
     model = EvalModelTemplate()
@@ -80,7 +80,7 @@ def test_dp_test(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
+        accelerator='dp',
     )
     trainer.fit(model)
     assert 'ckpt' in trainer.checkpoint_callback.best_model_path

diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
@@ -356,7 +356,7 @@ def test_full_loop_dp(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=3,
         weights_summary=None,
-        distributed_backend='dp',
+        accelerator='dp',
         gpus=2,
         deterministic=True,
     )

diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py
@@ -316,7 +316,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class):
     trainer = Trainer(
         logger=logger,
         default_root_dir=tmpdir,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=2,
         max_steps=1,
         checkpoint_callback=True,

diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
@@ -79,7 +79,7 @@ def run_test_from_config(trainer_options):
     trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu)
 
     if args.on_gpu:
-        trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1)
+        trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
         # Test the root_gpu property
         assert trainer.root_gpu == hvd.local_rank()