Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/legacy/seq2seq/seq2seq_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:

return (
RandomSampler(self.train_dataset)
if self.args.local_rank == -1
if self.args.local_process_index == -1
else DistributedSampler(self.train_dataset)
)

Expand Down
6 changes: 6 additions & 0 deletions src/transformers/training_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,12 @@ class TrainingArguments:
)
},
)
local_rank: int = field(
default=-1,
metadata={
"help": "When using torch.distributed.launch (Deprecated), it will pass `local_rank` in the script, so we need this for the parser. To get the local rank, prefer using the property `local_process_index`"
},
)
ddp_backend: Optional[str] = field(
default=None,
metadata={
Expand Down
15 changes: 5 additions & 10 deletions tests/deepspeed/test_deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,6 @@ def test_hf_ds_config_mismatch(self):

with mockenv_context(**self.dist_env_1_gpu):
trainer = get_regression_trainer(
local_rank=0,
fp16=fp16,
deepspeed=ds_config,
per_device_train_batch_size=per_device_train_batch_size,
Expand Down Expand Up @@ -552,7 +551,7 @@ def test_hf_scheduler_hf_optimizer(self):
ds_config_zero2_dict["zero_optimization"]["offload_optimizer"]["device"] = "none"
ds_config_zero2_dict["fp16"]["initial_scale_power"] = 1 # force optimizer on the first step
trainer = get_regression_trainer(
a=a, local_rank=0, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
a=a, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
)
trainer.train()
new_a = trainer.model.a.item()
Expand All @@ -566,7 +565,7 @@ def test_ds_scheduler_hf_optimizer(self):
ds_config_zero2_dict["zero_optimization"]["offload_optimizer"]["device"] = "none"
ds_config_zero2_dict["fp16"]["initial_scale_power"] = 1 # force optimizer on the first step
trainer = get_regression_trainer(
a=a, local_rank=0, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
a=a, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
)
trainer.train()
new_a = trainer.model.a.item()
Expand All @@ -580,7 +579,7 @@ def test_hf_scheduler_ds_optimizer(self):
ds_config_zero2_dict["zero_optimization"]["offload_optimizer"]["device"] = "none"
ds_config_zero2_dict["fp16"]["initial_scale_power"] = 1 # force optimizer on the first step
trainer = get_regression_trainer(
a=a, local_rank=0, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
a=a, fp16=True, deepspeed=ds_config_zero2_dict, output_dir=self.get_auto_remove_tmp_dir()
)
trainer.train()
new_a = trainer.model.a.item()
Expand All @@ -598,7 +597,7 @@ def test_stage3_nvme_offload(self):
ds_config_zero3_dict["zero_optimization"]["offload_param"] = nvme_config
ds_config_zero3_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True
trainer = get_regression_trainer(
local_rank=0, fp16=True, deepspeed=ds_config_zero3_dict, output_dir=self.get_auto_remove_tmp_dir()
fp16=True, deepspeed=ds_config_zero3_dict, output_dir=self.get_auto_remove_tmp_dir()
)
with CaptureLogger(deepspeed_logger) as cl:
trainer.train()
Expand All @@ -616,7 +615,6 @@ def model_init():
return model

trainer = get_regression_trainer(
local_rank=0,
fp16=True,
model_init=model_init,
deepspeed=ds_config_zero3_dict,
Expand All @@ -642,7 +640,7 @@ def test_hf_optimizer_with_offload(self, stage, dtype):
ds_config_dict["zero_optimization"]["offload_optimizer"]["device"] = "cpu"
ds_config_dict["zero_force_ds_cpu_optimizer"] = False # offload is not efficient w/o CPUAdam
with mockenv_context(**self.dist_env_1_gpu):
kwargs = {"local_rank": 0, "deepspeed": ds_config_dict, "output_dir": self.get_auto_remove_tmp_dir()}
kwargs = {"deepspeed": ds_config_dict, "output_dir": self.get_auto_remove_tmp_dir()}
kwargs[dtype] = True
trainer = get_regression_trainer(**kwargs)
with CaptureLogger(deepspeed_logger) as cl:
Expand All @@ -659,7 +657,6 @@ def test_fake_notebook_no_launcher(self, stage, dtype):
# to reset `deepspeed_logger.handlers[0].setStream(sys.stdout)` or directly capture from the deepspeed_logger.
with mockenv_context(**self.dist_env_1_gpu):
kwargs = {
"local_rank": 0,
"deepspeed": self.get_config_dict(stage),
"output_dir": self.get_auto_remove_tmp_dir(),
}
Expand All @@ -683,7 +680,6 @@ def test_early_get_last_lr(self, stage, dtype):
kwargs = {
"a": a,
"b": b,
"local_rank": 0,
"train_len": 8,
"deepspeed": self.get_config_dict(stage),
"per_device_train_batch_size": 8,
Expand Down Expand Up @@ -729,7 +725,6 @@ def test_gradient_accumulation(self, stage, dtype):
kwargs = {
"a": a,
"b": b,
"local_rank": 0,
"train_len": train_len,
"deepspeed": self.get_config_dict(stage),
"output_dir": self.get_auto_remove_tmp_dir(),
Expand Down
8 changes: 1 addition & 7 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1437,9 +1437,7 @@ def test_training_arguments_are_left_untouched(self):
args = TrainingArguments(tmp_dir, report_to=[])
dict1, dict2 = args.to_dict(), trainer.args.to_dict()
for key in dict1:
# Logging dir can be slightly different as they default to something with the time.
if key != "logging_dir":
self.assertEqual(dict1[key], dict2[key])
self.assertEqual(dict1[key], dict2[key])

def test_number_of_steps_in_training(self):
# Regular training has n_epochs * len(train_dl) steps
Expand Down Expand Up @@ -5433,7 +5431,6 @@ def hp_name(trial):
num_train_epochs=4,
disable_tqdm=True,
load_best_model_at_end=True,
logging_dir="runs",
run_name="test",
model_init=model_init,
)
Expand Down Expand Up @@ -5482,7 +5479,6 @@ def compute_objective(metrics: dict[str, float]) -> list[float]:
num_train_epochs=10,
disable_tqdm=True,
load_best_model_at_end=True,
logging_dir="runs",
run_name="test",
model_init=model_init,
compute_metrics=AlmostAccuracy(),
Expand Down Expand Up @@ -5572,7 +5568,6 @@ def hp_name(params):
num_train_epochs=4,
disable_tqdm=True,
load_best_model_at_end=True,
logging_dir="runs",
run_name="test",
model_init=model_init,
)
Expand Down Expand Up @@ -6170,7 +6165,6 @@ def model_init(config):
num_train_epochs=4,
disable_tqdm=True,
load_best_model_at_end=True,
logging_dir="runs",
run_name="test",
model_init=model_init,
)
Expand Down