facebookresearch · stephenroller · Dec 16, 2020 · Nov 10, 2020 · Nov 11, 2020 · Nov 17, 2020
diff --git a/parlai/agents/hugging_face/dialogpt.py b/parlai/agents/hugging_face/dialogpt.py
@@ -26,6 +26,12 @@ class DialoGPTDecoder(GPT2Decoder):
     This decoder is initialized with the pretrained model from Hugging Face.
     """
 
+    def __init__(self, opt, dict):
+        super().__init__(opt, dict)
+        if opt.get('batchsize', 1) == 1 and self.END_IDX == self.NULL_IDX:
+            # get around the dual usage of end_idx that would otherwise mask endtoken during forward pass.
+            self.NULL_IDX = -1
+
     def _init_from_pretrained(self, opt):
         # load model
         model_sz = opt['gpt2_size']

diff --git a/parlai/agents/hugging_face/dict.py b/parlai/agents/hugging_face/dict.py
@@ -20,7 +20,6 @@
     )
 
 SPECIAL_TOKENS = {"bos_token": "<bos>", "eos_token": "<eos>", "pad_token": "<pad>"}
-
 NO_OP = "x"
 
 

diff --git a/parlai/agents/hugging_face/gpt2.py b/parlai/agents/hugging_face/gpt2.py
@@ -108,6 +108,7 @@ def forward(self, input, encoder_state, incr_state=None):
             model_input = input[:, -1:]
             attention_mask = torch.cat([encoder_state, input], dim=-1) != self.NULL_IDX
 
+        model_input = model_input.clamp_(min=0)
         transformer_outputs = self.transformer(
             model_input,
             past=incr_state,

diff --git a/tests/nightly/gpu/test_dialogpt.py b/tests/nightly/gpu/test_dialogpt.py
@@ -6,6 +6,12 @@
 
 import unittest
 import parlai.utils.testing as testing_utils
+from parlai.core.agents import create_agent
+import sys
+import warnings
+
+if not sys.warnoptions:
+    warnings.simplefilter("ignore")
 
 
 @testing_utils.skipUnlessGPU
@@ -16,6 +22,64 @@ class TestDialogptModel(unittest.TestCase):
     Checks that DialoGPT gets a certain performance on the integration test task.
     """
 
+    def _test_batchsize(self, batchsize, add_special_tokens):
+        utterances = [
+            'How is your day so far?',
+            'I hope you you have a good day.',
+            "Nice to meet you. My name is John. ",
+            "I've got a feeling we're not in Kansas anymore.",
+        ]
+        opt = {
+            'model': 'hugging_face/dialogpt',
+            'gpt2_size': 'small',
+            'text_truncate': 100,
+            'label_truncate': 20,
+            'beam_min_length': 1,
+            'inference': 'beam',
+            'beam_size': 1,
+            'add_special_tokens': add_special_tokens,
+            'batchsize': batchsize,
+            'add_start_token': False,
+        }
+        dialogpt = create_agent(opt)
+
+        results_single = []
+        agents = [dialogpt.clone() for _ in utterances]
+        for u, a in zip(utterances, agents):
+            a.observe({'text': u, 'episode_done': True})
+            generation = a.act()['text']
+            results_single.append(generation)
+
+        results_batched = []
+        for idx in range(len(utterances) // batchsize):
+            agents = [dialogpt.clone() for _ in range(batchsize)]
+            batch = utterances[idx * batchsize : (idx + 1) * batchsize]
+            obs = []
+            for i, a in enumerate(agents):
+                obs.append(a.observe({'text': batch[i], 'episode_done': True}))
+            generations = [x['text'] for x in dialogpt.batch_act(obs)]
+            results_batched += generations
+
+        print(f'results_single = {results_single}')
+        print(f'results_batched = {results_batched}')
+        assert results_single == results_batched
+
+    def test_batchsize(self):
+        """
+        Ensures dialogpt provides the same generation results regardless of batchsize.
+        """
+        for batchsize in [2, 2, 4, 2]:
+            for add_special_tokens in [True]:
+                if batchsize > 1 and not add_special_tokens:
+                    continue
+                with self.subTest(
+                    f'test_batchsize with bs={batchsize} and add_special_token={add_special_tokens}'
+                ):
+                    print(
+                        f'_____________test_batchsize with bs={batchsize} and add_special_token={add_special_tokens}'
+                    )
+                    self._test_batchsize(batchsize, add_special_tokens)
+
     @testing_utils.retry(ntries=3, log_retry=True)
     def test_dialogpt(self):
         valid, test = testing_utils.train_model(