In [28]:
import time
import torch
import fairseq
from fairseq.models.transformer import TransformerModel
from fairseq.sequence_generator import EnsembleModel

In [3]:
TEST_TEXT = 'Companies and LSPs can translate their content with the ModernMT service in many languages ' \
            'directly on their production environment thanks to our simple RESTful API .'
TEST_TEXT_TARGET = 'Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre ' \
            'Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .'
DOG_TEXT = 'I love my dog'
test_text = TEST_TEXT  

# sample = torch.Tensor([[93, 4397, 4, 491, 9971, 22, 5673, 2]])  # This is tokenised 'Hello'

#### Do not use `torch.hub.load` as it will likely get down latest version of Fairseq

#### Load the downloaded translator model
*This will actually create an instance of 'fairseq.hub_utils.GeneratorHubInterface'

In [4]:
mmt_hub_generator = TransformerModel.from_pretrained(
  'model/en__it',
  checkpoint_file='model.pt',
  bpe='subword_nmt'
)

2021-02-05 13:17:09 | INFO | fairseq.file_utils | loading archive file model/en__it


KeyError: 'mmt_translation'

In [8]:
hub_generator = TransformerModel.from_pretrained(
  'model/wmt16.en-de.joined-dict.transformer',
  checkpoint_file='model.pt',
  bpe='subword_nmt',  # This one is important and improves translation, without it some tokens return <unk>
  bpe_codes='model/wmt16.en-de.joined-dict.transformer/bpecodes'
)

2021-02-05 13:41:45 | INFO | fairseq.file_utils | loading archive file model/wmt16.en-de.joined-dict.transformer
2021-02-05 13:41:56 | INFO | fairseq.tasks.translation | [en] dictionary: 32768 types
2021-02-05 13:41:56 | INFO | fairseq.tasks.translation | [de] dictionary: 32768 types
2021-02-05 13:41:59 | INFO | fairseq.models.fairseq_model | Namespace(activation_dropout=0.0, activation_fn='relu', adam_betas='(0.9, 0.98)', adam_eps=1e-08, adaptive_input=False, adaptive_softmax_cutoff=None, adaptive_softmax_dropout=0, arch='transformer_wmt_en_de_big', attention_dropout=0.1, batch_size=None, bpe='subword_nmt', bpe_codes='model/wmt16.en-de.joined-dict.transformer/bpecodes', bpe_separator='@@', clip_norm=0.0, criterion='label_smoothed_cross_entropy', cross_self_attention=False, data='/home/ubuntu/work/mmt/model/wmt16.en-de.joined-dict.transformer', decoder_attention_heads=16, decoder_embed_dim=1024, decoder_embed_path=None, decoder_ffn_embed_dim=4096, decoder_input_dim=1024, decoder_layerd

In [9]:
print(f'Type of [en2de] is {type(hub_generator)}')
print(f'Type of [en2de.models[0]] is {type(hub_generator.models[0])}')

Type of [en2de] is <class 'fairseq.hub_utils.GeneratorHubInterface'>
Type of [en2de.models[0]] is <class 'fairseq.models.transformer.TransformerModel'>


In [91]:
class HubTranslateWrapper(torch.nn.Module):
    def __init__(self, generator):
        super(HubTranslateWrapper, self).__init__()
        self.generator = generator

    def encode(self, text):
        return self.generator.encode(text)
    
    def decode(self, x):
        return self.generator.decode(x)
    
    def forward(self, x):
        
        # We need to pass a text to translate
        # We cannot return a string here, otherwise the trace fails with "Only tensors, lists, 
        # tuples of tensors, or dictionary of tensors can be output from traced functions"
        return self.encode(self.generator.translate(self.decode(x)))
    
def benchmark(generator, encoder, decoder, test_text, times=3):
    i = 0
    print(f'== Running the same translation {times} times ==')
    while i < times:
        begin_ts = time.time()
        decoder(generator(encoder(test_text)))
        print('- executed in {:.6f}s'.format(time.time() - begin_ts))
        i += 1
        
def benchmark_no_encode(func, test_input, times=3, batch_size=1):
    i = 0
    if batch_size == 1:
        print(f'== Running single translation {times} times ==')
    else:
        print(f'== Running batches of {batch_size} translations {times} times ==')
    while i < times:
        begin_ts = time.time()
        for j in range(batch_size):
            func(test_input)
        print('- executed {} translations in {:.6f}s'.format(batch_size, time.time() - begin_ts))
        i += 1

In [14]:
print(hub_generator.encode(TEST_TEXT))
print(hub_generator.encode(TEST_TEXT_TARGET))

tensor([26999,     9,   212,  3854,    71,    73, 18842,   113,  1306,    26,
            6, 13994,   267,   210,   710,   457,     7,   255,  3571,  1582,
           22,   113,   830,  1122,  2591,    12,    77,  2230, 18043,   175,
         3111, 17819,     5,     2])
tensor([  490,    13,  6155,  7835, 29862,   103,  4951,   363,  7588, 18043,
          175,  3111, 17819,   233,  4195,    25,    53, 13994,   267,   210,
          710,   990,     7,   483,  4709,  1056,     7,   233, 15578,  5716,
        13995, 26497,     5,     2])


In [15]:
translated = hub_generator.translate(test_text)
print(f'+ Using [fairseq.hub_utils.GeneratorHubInterface.translate]:\n - {translated}')

sampled = hub_generator.sample(test_text)
print(f'+ Using [fairseq.hub_utils.GeneratorHubInterface.sample]:\n - {sampled}')

hub_translate_wrapper = HubTranslateWrapper(hub_generator)
hub_translate_wrapper_encoded = hub_translate_wrapper.forward(hub_generator.encode(test_text))
hub_translate_wrapper_tranlated = hub_generator.decode(hub_translate_wrapper_encoded)
print(f'+ Using [GeneratorHubInterfaceWrapper.forward]:\n - {hub_translate_wrapper_tranlated}')
is_match = "==" if hub_translate_wrapper_tranlated == translated else "!="
print(f' {is_match} [fairseq.hub_utils.GeneratorHubInterface.translate]')

+ Using [fairseq.hub_utils.GeneratorHubInterface.translate]:
 - Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .
+ Using [fairseq.hub_utils.GeneratorHubInterface.sample]:
 - Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .
+ Using [GeneratorHubInterfaceWrapper.forward]:
 - Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .
 == [fairseq.hub_utils.GeneratorHubInterface.translate]


In [92]:
benchmark(hub_translate_wrapper.forward, hub_generator.encode, hub_generator.decode, test_text)

== Running the same translation 3 times ==
- executed in 1.072636s
- executed in 1.061057s
- executed in 1.062300s


#### JIT Trace

In [17]:
traced_hub_translate = torch.jit.trace(hub_translate_wrapper, hub_generator.encode(test_text))

  return tensor.item()
  if i == self.unk():
  if idx < len(self.symbols):
  return self.symbols[idx]
  self.src_lengths = torch.tensor(-1)
  res = values[0].new(len(values), size).fill_(pad_idx)
  copy_tensor(v, res[i][size - len(v) :] if left_pad else res[i][: len(v)])
  [s["source"].ne(pad_idx).long().sum() for s in samples]
  ntokens = src_lengths.sum().item()
  int(self.max_len_a * src_len + self.max_len_b),
  if self.weights is None or max_pos > self.weights.size(0):
  assert embed_dim == self.embed_dim
  assert list(query.size()) == [tgt_len, bsz, embed_dim]
  [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
  False for i in range(bsz)
  if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any():
  assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
  assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
  assert key_padding_mask.size(0) == bsz
  assert key_padding_mask.size(1) == src_len


  lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)
  lprobs = lprobs[:, ::beam_size, :].contiguous()
  lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
  lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
  eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask)
  assert num_remaining_sent >= 0
  if num_remaining_sent == 0:
  cand_offsets[: eos_mask.size(1)],
  assert (~cands_to_ignore).any(dim=1).all()
  ) == new_order.size(0):
  for i in range(bbsz_idx.size()[0]):
  sent = unfin_idx + cum_unfin[unfin_idx]
  seen = str(sent.item()) + "_" + str(unfin_idx.item())
  if len(finalized[sent]) < beam_size:
  finalized[sent].append(
  [float(elem["score"].item()) for elem in finalized[sent]]
  [float(elem["score"].item()) for elem in finalized[sent]]
  finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices]
  for id, hypos in zip(batch["id"].tolist(), translations):


In [20]:
# TODO: this does not work right, always return the same value, even if an empty tensor is given
traced_hub_translate_encoded = traced_hub_translate(torch.Tensor([[]]))
# traced_hub_translate_encoded = traced_hub_translate(hub_generator.encode(test_text))
traced_hub_translate_translated = hub_generator.decode(traced_hub_translate_encoded)
print(f'+ Using [JIT Traced]:\n - {traced_hub_translate_translated}')
is_match = "==" if traced_hub_translate_translated == translated else "!="
print(f' {is_match} [fairseq.hub_utils.GeneratorHubInterface.translate]')

+ Using [JIT Traced]:
 - Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .
 == [fairseq.hub_utils.GeneratorHubInterface.translate]


In [44]:
benchmark(traced_generator, hub_generator.encode, hub_generator.decode, test_text)

== Running the same translation 5 times ==
- executed in 0.001740s
- executed in 0.001297s
- executed in 0.001398s
- executed in 0.001393s
- executed in 0.001374s


#### Trace `GeneratorHubInterface.generate`

In [92]:
class HubGenerateWrapper(torch.nn.Module):
    def __init__(self, generator):
        super(HubGenerateWrapper, self).__init__()
        self.generator = generator

    def forward(self, x):

        return self.generator.generate(x)[0]['tokens']

In [93]:
hub_generate_wrapper = HubGenerateWrapper(hub_generator)
hub_generate_wrapper_encoded = hub_generate_wrapper.forward(hub_generator.encode(test_text))
print(hub_generate_wrapper_encoded)

# print(f'+ Using [GeneratorHubInterfaceWrapper.forward]:\n - {hub_translate_wrapper_tranlated}')
# is_match = "==" if hub_translate_wrapper_tranlated == translated else "!="
# print(f' {is_match} [fairseq.hub_utils.GeneratorHubInterface.translate]')

tensor([  490,    13,  6155,  7835, 29862,   103,  4951,   363,  7588, 18043,
          175,  3111, 17819,   233,  4195,    25,    53, 13994,   267,   210,
          710,   990,     7,   483,  4709,  1056,     7,   233, 15578,  5716,
        13995, 26497,     5,     2])


In [97]:
traced_hub_generate = torch.jit.trace(hub_generate_wrapper, hub_generator.encode(test_text))

In [101]:
traced_hub_generate(hub_generator.encode(test_text))

tensor([  490,    13,  6155,  7835, 29862,   103,  4951,   363,  7588, 18043,
          175,  3111, 17819,   233,  4195,    25,    53, 13994,   267,   210,
          710,   990,     7,   483,  4709,  1056,     7,   233, 15578,  5716,
        13995, 26497,     5,     2])

In [104]:
benchmark(traced_hub_generate, hub_generator.encode, hub_generator.decode, test_text)

== Running the same translation 5 times ==
- executed in 1.104018s
- executed in 0.928745s
- executed in 0.925975s
- executed in 0.924407s
- executed in 0.925427s


In [None]:
# This fails
traced_hub_generate = torch.neuron.trace(hub_generate_wrapper, hub_generator.encode(test_text))

#### Neuron Trace

In [38]:
import torch.neuron

In [39]:
neuron_generator = torch.neuron.trace(generator_wrapper, hub_generator.encode(test_text))

  return tensor.item()
  if i == self.unk():
  if idx < len(self.symbols):
  return self.symbols[idx]
  self.src_lengths = torch.tensor(-1)
  res = values[0].new(len(values), size).fill_(pad_idx)
  copy_tensor(v, res[i][size - len(v) :] if left_pad else res[i][: len(v)])
  [s["source"].ne(pad_idx).long().sum() for s in samples]
  ntokens = src_lengths.sum().item()
  int(self.max_len_a * src_len + self.max_len_b),
  if self.weights is None or max_pos > self.weights.size(0):
  assert embed_dim == self.embed_dim
  assert list(query.size()) == [tgt_len, bsz, embed_dim]
  [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
  False for i in range(bsz)
  if self.cross_self_attention or prev_output_tokens.eq(self.padding_idx).any():
  assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
  assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
  assert key_padding_mask.size(0) == bsz
  assert key_padding_mask.size(1) == src_len


  lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)
  lprobs = lprobs[:, ::beam_size, :].contiguous()
  lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
  lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
  eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask)
  assert num_remaining_sent >= 0
  if num_remaining_sent == 0:
  cand_offsets[: eos_mask.size(1)],
  assert (~cands_to_ignore).any(dim=1).all()
  ) == new_order.size(0):
  for i in range(bbsz_idx.size()[0]):
  sent = unfin_idx + cum_unfin[unfin_idx]
  seen = str(sent.item()) + "_" + str(unfin_idx.item())
  if len(finalized[sent]) < beam_size:
  finalized[sent].append(
  [float(elem["score"].item()) for elem in finalized[sent]]
  [float(elem["score"].item()) for elem in finalized[sent]]
  finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices]
  for id, hypos in zip(batch["id"].tolist(), translations):


RuntimeError: 
Module/Function 'GeneratorHubInterfaceWrapper' contains in-place operator aten::div_#800 with pattern view->view->inplace
which cannot be automatically converted to out-of-place operators. Because compiler
loves permuting operator execution order for the purpose of performance optimization,
we shamelessly ask you to rewrite your model so that it is free of in-place operators.
For example, the following forward function

```
def forward(self, tensor):
    tensor = tensor.clone()
    torch.sigmoid_(tensor[..., :3])
    output = torch.tanh(tensor[2:])
    return output
```

may be rewritten into the following

```
def forward(self, tensor):
    tensor = tensor.clone()
    temp = torch.sigmoid(tensor[..., :3])
    new_tensor = torch.cat(temp, tensor[..., 3:])
    output = torch.tanh(new_tensor[2:])
    return output
```


- Compilation fails with 'Module/Function 'GeneratorHubInterfaceWrapper' contains in-place operator aten::div_#800 with pattern view->view->inplace which cannot be automatically converted to out-of-place operators'
- The same for trace of `hub_generate_wrapper`

### Try to wrap the ModernMT motel into GeneratorHubInterface
***Didn't work***

In [59]:
import sys
sys.path.append('src')

In [62]:
from mmt.checkpoint import CheckpointRegistry
from mmt.decoder import Suggestion, ModelConfig, MMTDecoder

In [65]:
# SequenceGenerator as a minimum needs a list of models and a target dictionary
device = None
config = ModelConfig.load('model')
builder = CheckpointRegistry.Builder()
for name, checkpoint_path in config.checkpoints:
    builder.register(name, checkpoint_path)
checkpoints = builder.build(device)
mmt_generator = MMTDecoder(checkpoints, device=device)

2021-02-05 11:13:44 | INFO | fairseq.tasks.translation | [sl] dictionary: 32896 types
2021-02-05 11:13:44 | INFO | fairseq.tasks.translation | [tl] dictionary: 32896 types


In [70]:
test_text = TEST_TEXT  #'I love my dog'
mmt_translated = hub_generator.translate(test_text)
print(f'+ Using [GeneratorHubInterface] with ModernMT model:\n - {mmt_translated}')

+ Using [GeneratorHubInterface] with ModernMT model:
 - Unternehmen und Sprachdienstleister können dank unserer einfachen RESTful API ihre Inhalte mit dem ModernMT Service in viele Sprachen direkt in ihre Produktionsumgebung übersetzen .


Above blows up with "shape '[1, -1, 32768]' is invalid for input of size 164480"

## Try tracing only models

*Use the EN-DE model*

In [73]:
class ModelForwardEncoderWrapper(torch.nn.Module):
    def __init__(self, model):
        super(ModelForwardEncoderWrapper, self).__init__()
        self.model = model

    def forward(self, x):

        return self.model.forward_encoder({
            'src_tokens': x, 
            'src_lengths': torch.LongTensor([t.numel() for t in x])})[0].encoder_out

In [82]:
test_text = TEST_TEXT
test_text_encoded = hub_generator.encode(test_text)
test_text_encoded = test_text_encoded.reshape([1, -1])
test_text_encoded

tensor([[26999,     9,   212,  3854,    71,    73, 18842,   113,  1306,    26,
             6, 13994,   267,   210,   710,   457,     7,   255,  3571,  1582,
            22,   113,   830,  1122,  2591,    12,    77,  2230, 18043,   175,
          3111, 17819,     5,     2]])

In [76]:
model_forward_encoder_wrapper = ModelForwardEncoderWrapper(EnsembleModel([hub_generator.models[0]]))
model_forward_encoder_wrapper_encoded = model_forward_encoder_wrapper.forward(test_text_encoded)
print(model_forward_encoder_wrapper_encoded)

tensor([[[ 0.0653,  0.0351, -0.0476,  ...,  0.0112, -0.0564,  0.1521]],

        [[-0.0361, -0.0188,  0.0328,  ..., -0.0113,  0.0097,  0.0463]],

        [[-0.0060, -0.0125, -0.0390,  ...,  0.0148,  0.0915, -0.1332]],

        ...,

        [[-0.0399,  0.0138, -0.0705,  ..., -0.0054, -0.0221, -0.0650]],

        [[ 0.0008,  0.0015,  0.0063,  ..., -0.0041, -0.0090, -0.0198]],

        [[ 0.0008,  0.0015,  0.0063,  ..., -0.0042, -0.0090, -0.0197]]],
       grad_fn=<NativeLayerNormBackward>)


In [93]:
benchmark_no_encode(model_forward_encoder_wrapper.forward, test_text_encoded, batch_size=30)

== Running batches of 30 translations 3 times ==
- executed 30 translations in 1.185574s
- executed 30 translations in 1.134813s
- executed 30 translations in 1.132835s


#### JIT Trace

In [77]:
jit_model_forward_encoder_wrapper = torch.jit.trace(model_forward_encoder_wrapper, test_text_encoded)

In [96]:
jit_model_forward_encoder_wrapper(test_text_encoded)

tensor([[[ 0.0653,  0.0351, -0.0476,  ...,  0.0112, -0.0564,  0.1521]],

        [[-0.0361, -0.0188,  0.0328,  ..., -0.0113,  0.0097,  0.0463]],

        [[-0.0060, -0.0125, -0.0390,  ...,  0.0148,  0.0915, -0.1332]],

        ...,

        [[-0.0399,  0.0138, -0.0705,  ..., -0.0054, -0.0221, -0.0650]],

        [[ 0.0008,  0.0015,  0.0063,  ..., -0.0041, -0.0090, -0.0198]],

        [[ 0.0008,  0.0015,  0.0063,  ..., -0.0042, -0.0090, -0.0197]]],
       grad_fn=<NativeLayerNormBackward>)

In [97]:
benchmark_no_encode(jit_model_forward_encoder_wrapper, test_text_encoded, batch_size=30)

== Running batches of 30 translations 3 times ==
- executed 30 translations in 1.067263s
- executed 30 translations in 1.059980s
- executed 30 translations in 1.063120s


#### Neuron Trace

In [99]:
import torch.neuron

In [100]:
neuron_model_forward_encoder_wrapper = torch.neuron.trace(model_forward_encoder_wrapper, test_text_encoded)

  if self.weights is None or max_pos > self.weights.size(0):
  assert embed_dim == self.embed_dim
  assert list(query.size()) == [tgt_len, bsz, embed_dim]


NotImplementedError: Please implement aten::Bool in native_ops/aten.py