Fix Trainer in DataParallel setting (#5685)

* Fix Trainer in DataParallel setting * Fix typo Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
huggingface · Jul 13, 2020 · ce374ba · ce374ba
1 parent 0a19a49
commit ce374ba
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
@@ -618,6 +618,9 @@ def _training_step(
 
         if self.args.past_index >= 0 and self._past is not None:
             inputs["mems"] = self._past
+        # Our model outputs do not work with DataParallel, so forcing return tuple.
+        if self.args.n_gpu > 1:
+            inputs["return_tuple"] = True
 
         outputs = model(**inputs)
         loss = outputs[0]  # model outputs are always tuple in transformers (see doc)
@@ -818,6 +821,9 @@ def _prediction_loop(
                     inputs[k] = v.to(self.args.device)
             if self.args.past_index >= 0:
                 inputs["mems"] = past
+            # Our model outputs do not work with DataParallel, so forcing return tuple.
+            if self.args.n_gpu > 1:
+                inputs["return_tuple"] = True
 
             with torch.no_grad():
                 outputs = model(**inputs)