facebookresearch · EricMichaelSmith · Mar 19, 2021 · Feb 21, 2021 · Feb 23, 2021 · Feb 24, 2021
diff --git a/parlai/agents/transformer/modules.py b/parlai/agents/transformer/modules.py
@@ -748,7 +748,7 @@ def forward_layers(
         tensor: torch.Tensor,
         encoder_output: torch.Tensor,
         encoder_mask: torch.Tensor,
-        incr_state: Dict[int, torch.Tensor],
+        incr_state: Dict[int, Dict[str, Dict[str, torch.Tensor]]],
     ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
         """
         Forward pass of decoder layers.
@@ -797,8 +797,9 @@ def forward(self, input, encoder_state, incr_state=None):
         encoder_output, encoder_mask = encoder_state
 
         seq_len = input.size(1)
-        positions = input.new(seq_len).long()
-        positions = torch.arange(seq_len, out=positions).unsqueeze(0)
+        positions = torch.arange(
+            seq_len, dtype=torch.long, device=input.device
+        ).unsqueeze(0)
 
         if incr_state is not None:
             # We're doing incremental decoding, so select only the most recent position

diff --git a/parlai/scripts/torchscript.py b/parlai/scripts/torchscript.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List
+
+import torch.jit
+import torch.nn as nn
+from packaging import version
+
+from parlai.core.agents import create_agent
+from parlai.core.opt import Opt
+from parlai.core.params import ParlaiParser
+from parlai.core.script import ParlaiScript, register_script
+from parlai.utils.io import PathManager
+
+
+def export_model(opt: Opt):
+    """
+    Export a model to TorchScript so that inference can be run outside of ParlAI.
+
+    Currently, only CPU greedy-search inference on BART models is supported.
+    """
+
+    if version.parse(torch.__version__) < version.parse('1.7.0'):
+        raise NotImplementedError(
+            'TorchScript export is only supported for Torch 1.7 and higher!'
+        )
+    else:
+        # Only load TorchScriptGreedySearch now, because this will trigger scripting of
+        # associated modules
+        from parlai.torchscript.modules import TorchScriptGreedySearch
+
+    overrides = {
+        'no_cuda': True,  # TorchScripting is CPU only
+        'model_parallel': False,  # model_parallel is not currently supported when TorchScripting
+    }
+    if 'override' not in opt:
+        opt['override'] = {}
+    for k, v in overrides.items():
+        opt[k] = v
+        opt['override'][k] = v
+
+    # Create the unscripted greedy-search module
+    agent = create_agent(opt, requireModelExists=True)
+    original_module = TorchScriptGreedySearch(agent)
+
+    # Script the module and save
+    scripted_module = torch.jit.script(TorchScriptGreedySearch(agent))
+    with PathManager.open(opt['scripted_model_file'], 'wb') as f:
+        torch.jit.save(scripted_module, f)
+
+    # Compare the original module to the scripted module against the test inputs
+    if len(opt['input']) > 0:
+        inputs = opt['input'].split('|')
+        print('\nGenerating given the original unscripted module:')
+        _run_conversation(module=original_module, inputs=inputs)
+        print('\nGenerating given the scripted module:')
+        _run_conversation(module=scripted_module, inputs=inputs)
+
+
+def setup_args() -> ParlaiParser:
+    parser = ParlaiParser(add_parlai_args=True, add_model_args=True)
+    parser.add_argument(
+        '-smf',
+        '--scripted-model-file',
+        type=str,
+        default='_scripted.pt',
+        help='Where the scripted model checkpoint will be saved',
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        type=str,
+        default='',
+        help="Input string to pass into the encoder of the scripted model, to test it against the unscripted version. Separate lines with a pipe",
+    )
+    return parser
+
+
+def _run_conversation(module: nn.Module, inputs: List[str]):
+    """
+    Run a conversation with the given module given the input strings.
+    """
+    context = []
+    for input_ in inputs:
+        print(' TEXT: ' + input_)
+        context.append(input_)
+        label = module('\n'.join(context))
+        print("LABEL: " + label)
+        context.append(label)
+
+
+@register_script('torchscript', hidden=True)
+class TorchScript(ParlaiScript):
+    @classmethod
+    def setup_args(cls):
+        return setup_args()
+
+    def run(self):
+        return export_model(self.opt)
+
+
+if __name__ == '__main__':
+    TorchScript.main()
diff --git a/parlai/torchscript/README.md b/parlai/torchscript/README.md
@@ -0,0 +1,27 @@
+# Agent exported to TorchScript (JIT compilation)
+
+This agent will read in a ParlAI agent that has been exported to TorchScript with JIT compilation, for use in greedy-search inference on CPU. This allows inference to be run on models without using any ParlAI overhead, either for tokenization or for the forward passes through the model. Currently, only BART models are supported.
+
+Sample call for exporting a BART model to TorchScript:
+```
+parlai torchscript \
+--model-file ${MODEL_FILE} \
+--model bart \
+--no-cuda \
+--scripted-model-file ~/_test_scripted_model__bart.pt \
+--input 'I am looking for a restaurant in the west part of town.|APIRESP: Restaurant 14 matches'
+```
+
+Interacting with an exported model using `parlai interactive`:
+```
+parlai interactive \
+--model-file ~/_test_scripted_model__bart.pt \
+--model parlai.torchscript.agents:TorchScriptAgent
+```
+
+Loading in and running inference on an exported model, without any ParlAI overhead:
+```
+python parlai/torchscript/scripts/test_exported_model.py \
+--scripted-model-file ~/_test_scripted_model__bart.pt \
+--input 'I am looking for a restaurant in the west part of town.|APIRESP: Restaurant 14 matches'
+```
diff --git a/parlai/torchscript/__init__.py b/parlai/torchscript/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/parlai/torchscript/agents.py b/parlai/torchscript/agents.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import List
+
+import torch
+
+from parlai.core.agents import Agent
+from parlai.core.message import Message
+from parlai.core.opt import Opt
+from parlai.utils.io import PathManager
+
+
+class TorchScriptAgent(Agent):
+    """
+    ParlAI agent exported to TorchScript with JIT compilation and then loaded from disk.
+
+    Metrics and batch act are currently unsupported, and CUDA is unsupported because
+    TorchScripting is currently CPU-only.
+    """
+
+    def __init__(self, opt: Opt, shared=None):
+
+        super().__init__(opt=opt, shared=shared)
+        with PathManager.open(self.opt['model_file'], "rb") as f:
+            self.module = torch.jit.load(f)
+
+        # Track incoming history strings
+        self.history: List[str] = []
+
+    def share(self):
+        """
+        Share the scripted module object.
+        """
+        shared = super().share()
+        shared['module'] = self.module
+        return shared
+
+    def observe(self, observation: Message) -> Message:
+        # TODO: support self._validate_observe_invariants() method of TorchAgent
+
+        self.history.append(observation['text'])
+
+        return super().observe(observation)
+
+    def self_observe(self, self_message: Message) -> None:
+        # TODO: support self._validate_self_observe_invariants() method of TorchAgent
+
+        assert self.observation is not None
+        if self.observation['episode_done']:
+            # oh this was the last example in the episode. reset the history
+            self.history = []
+            # additionally mark the last observation as invalid
+            self.observation = None
+        else:
+            self.history.append(self_message['text'])
+
+    def reset(self):
+        super().reset()
+        self.history = []
+
+    def act(self) -> Message:
+        response_text = self.module('\n'.join(self.history))
+        response = Message({'text': response_text, 'episode_done': False})
+        # self.observation will determine if we're going onto a new episode
+        self.self_observe(response)
+        return response