Skip to content

Commit

Permalink
drop usage of deprecated distributed_backend (Lightning-AI#5009)
Browse files Browse the repository at this point in the history
Co-authored-by: chaton <thomas@grid.ai>
Co-authored-by: Roger Shieh <sh.rog@protonmail.ch>
  • Loading branch information
3 people committed Dec 9, 2020
1 parent 2c11d96 commit 53d7c95
Show file tree
Hide file tree
Showing 34 changed files with 130 additions and 131 deletions.
12 changes: 6 additions & 6 deletions benchmarks/test_sharded_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,29 +105,29 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
reason="test should be run outside of pytest")
@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 32")
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
accelerator=args.distributed_backend,
accelerator=args.accelerator,
plugin=DDPShardedPlugin(),
model_cls=SeedTrainLoaderModel
model_cls=SeedTrainLoaderModel,
)


@pytest.mark.skipif(not FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
reason="test should be run outside of pytest")
@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 16")
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 16")
def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
accelerator=args.distributed_backend,
accelerator=args.accelerator,
plugin=DDPShardedPlugin(),
model_cls=SeedTrainLoaderModel
model_cls=SeedTrainLoaderModel,
)


Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def main(args: Namespace) -> None:
if args.seed is not None:
pl.seed_everything(args.seed)

if args.distributed_backend == 'ddp':
if args.accelerator == 'ddp':
# When using a single GPU per process and per
# DistributedDataParallel, we need to divide the batch size
# ourselves based on the total number of GPUs we have
Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/reinforce_learn_Qnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def main(args) -> None:

trainer = pl.Trainer(
gpus=1,
distributed_backend='dp',
accelerator='dp',
val_check_interval=100
)

Expand Down
2 changes: 1 addition & 1 deletion pl_examples/domain_templates/semantic_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def main(hparams: Namespace):
logger=logger,
max_epochs=hparams.epochs,
accumulate_grad_batches=hparams.grad_batches,
distributed_backend=hparams.distributed_backend,
accelerator=hparams.accelerator,
precision=16 if hparams.use_amp else 32,
)

Expand Down
12 changes: 6 additions & 6 deletions pytorch_lightning/accelerators/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def on_trainer_init(
self.trainer.tpu_id = self.trainer.tpu_cores[0] if isinstance(self.trainer.tpu_cores, list) else None

if num_processes != 1 and distributed_backend != "ddp_cpu":
rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.")
rank_zero_warn("num_processes is only used for `accelerator='ddp_cpu'`. Ignoring it.")
self.trainer.num_processes = num_processes

# override with environment flag
Expand Down Expand Up @@ -276,7 +276,7 @@ def select_accelerator(self):
accelerator_backend = accelerators.CPUAccelerator(self.trainer, cluster_env)
else:
raise MisconfigurationException(
f'Trainer(distributed_backend={self.trainer.distributed_backend} is not a supported backend'
f'Trainer(accelerator={self.trainer.distributed_backend} is not a supported backend'
)

return accelerator_backend
Expand All @@ -299,8 +299,8 @@ def set_distributed_mode(self):
elif self.trainer.num_gpus > 1:
rank_zero_warn(
'You requested multiple GPUs but did not specify a backend, e.g.'
' Trainer(distributed_backend="dp"|"ddp"|"ddp2").'
' Setting distributed_backend="ddp_spawn" for you.'
' `Trainer(accelerator="dp"|"ddp"|"ddp2")`.'
' Setting `accelerator="ddp_spawn"` for you.'
)
self.trainer.distributed_backend = "ddp_spawn"

Expand Down Expand Up @@ -342,7 +342,7 @@ def set_distributed_mode(self):
if self.trainer.num_nodes > 1 and not (self.trainer.use_ddp2 or self.trainer.use_ddp):
raise MisconfigurationException(
'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. '
'To silence this warning set distributed_backend=ddp or distributed_backend=ddp2'
'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`'
)

rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self.trainer.on_gpu}')
Expand All @@ -366,7 +366,7 @@ def check_horovod(self):
"""Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
if not HOROVOD_AVAILABLE:
raise MisconfigurationException(
'Requested `distributed_backend="horovod"`, but Horovod is not installed.'
'Requested `accelerator="horovod"`, but Horovod is not installed.'
'Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]'
)

Expand Down
2 changes: 1 addition & 1 deletion pytorch_lightning/accelerators/ddp_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _check_can_spawn_children(self):
if self._has_spawned_children:
raise RuntimeError(
"You tried to run `.fit` or `.test` multiple times in the same script."
" This is not supported in DDP mode, switch to `distributed_backend='ddp_spawn'` instead."
" This is not supported in DDP mode, switch to `accelerator='ddp_spawn'` instead."
)

def set_world_ranks(self, process_idx):
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/trainer/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
if dataloader.num_workers > 0 and using_spawn:
rank_zero_warn('Dataloader(num_workers>0) and ddp_spawn do not mix well!'
' Your performance might suffer dramatically.'
' Please consider setting distributed_backend=ddp to use num_workers > 0'
' Please consider setting accelerator=ddp to use num_workers > 0'
' (this is a bottleneck of Python .spawn() and PyTorch')

elif dataloader.num_workers == 0 and using_spawn:
rank_zero_warn('You are using `distributed_backend=ddp_spawn` with num_workers=0.'
' For much faster performance, switch to `distributed_backend=ddp`'
rank_zero_warn('You are using `accelerator=ddp_spawn` with num_workers=0.'
' For much faster performance, switch to `accelerator=ddp`'
' and set `num_workers>0`')

elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn:
Expand Down
2 changes: 1 addition & 1 deletion tests/backends/ddp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
parser.add_argument('--tmpdir')
parser.add_argument('--workdir')
parser.set_defaults(gpus=2)
parser.set_defaults(distributed_backend="ddp")
parser.set_defaults(accelerator="ddp")
args = parser.parse_args()

model = EvalModelTemplate()
Expand Down
40 changes: 20 additions & 20 deletions tests/backends/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
callbacks=[CB()]
accelerator='ddp_cpu',
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -68,9 +68,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -90,9 +90,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_spawn',
accelerator='ddp_spawn',
gpus=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -120,9 +120,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -152,9 +152,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp2',
accelerator='ddp2',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -181,9 +181,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp',
accelerator='ddp',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -210,9 +210,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp2',
accelerator='ddp2',
gpus=2,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -239,9 +239,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand All @@ -267,9 +267,9 @@ def on_fit_start(self, trainer, pl_module):
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down Expand Up @@ -304,9 +304,9 @@ def on_fit_start(self, trainer, pl_module):
trainer = Trainer(
plugins=[CustomCluster()],
fast_dev_run=True,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=1,
callbacks=[CB()]
callbacks=[CB()],
)

with pytest.raises(SystemExit):
Expand Down
6 changes: 3 additions & 3 deletions tests/backends/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
Expand All @@ -38,7 +38,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
Expand All @@ -54,7 +54,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):


@pytest.mark.parametrize('cli_args', [
pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
Expand Down
8 changes: 4 additions & 4 deletions tests/backends/test_ddp_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='ddp_spawn',
accelerator='ddp_spawn',
)

model = EvalModelTemplate()
Expand All @@ -51,8 +51,8 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='ddp_spawn',
progress_bar_refresh_rate=0
accelerator='ddp_spawn',
progress_bar_refresh_rate=0,
)

model = EvalModelTemplate()
Expand All @@ -79,7 +79,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
limit_train_batches=0.2,
limit_val_batches=0.2,
gpus=[0, 1],
distributed_backend='ddp_spawn'
accelerator='ddp_spawn',
)
result = trainer.fit(model, **fit_options)
assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
8 changes: 4 additions & 4 deletions tests/backends/test_dp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
accelerator='dp',
)

model = EvalModelTemplate()
Expand All @@ -54,8 +54,8 @@ def test_multi_gpu_model_dp(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
progress_bar_refresh_rate=0
accelerator='dp',
progress_bar_refresh_rate=0,
)

model = EvalModelTemplate()
Expand All @@ -80,7 +80,7 @@ def test_dp_test(tmpdir):
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
distributed_backend='dp',
accelerator='dp',
)
trainer.fit(model)
assert 'ckpt' in trainer.checkpoint_callback.best_model_path
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def test_full_loop_dp(tmpdir):
default_root_dir=tmpdir,
max_epochs=3,
weights_summary=None,
distributed_backend='dp',
accelerator='dp',
gpus=2,
deterministic=True,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/loggers/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class):
trainer = Trainer(
logger=logger,
default_root_dir=tmpdir,
distributed_backend='ddp_cpu',
accelerator='ddp_cpu',
num_processes=2,
max_steps=1,
checkpoint_callback=True,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/data/horovod/train_default_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def run_test_from_config(trainer_options):
trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu)

if args.on_gpu:
trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1)
trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
# Test the root_gpu property
assert trainer.root_gpu == hvd.local_rank()

Expand Down
Loading

0 comments on commit 53d7c95

Please sign in to comment.