Skip to content

Commit

Permalink
Merge pull request #387 from allenai/epwalsh/dist-init
Browse files Browse the repository at this point in the history
initialize distributed group before logging
  • Loading branch information
epwalsh committed Dec 1, 2023
2 parents e30d29f + 7f54682 commit d1c185b
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions scripts/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def main(cfg: TrainConfig) -> None:
"setting has no effect."
)

# Initialize process group and set device.
dist.init_process_group(backend="nccl")
barrier()

# Set CUDA device.
torch.cuda.set_device(f"cuda:{get_local_rank()}")
device = torch.device("cuda")

Expand Down Expand Up @@ -239,6 +239,9 @@ def dummy_init_fn(module: torch.nn.Module) -> None:


if __name__ == "__main__":
# Initialize process group.
dist.init_process_group(backend="nccl")

prepare_cli_environment()

try:
Expand Down

0 comments on commit d1c185b

Please sign in to comment.