In [1]:
import torch
from gector.gec_model import GecBERTModel
from gector.seq2labels_model import Seq2Labels
from allennlp.nn import util

  from .autonotebook import tqdm as notebook_tqdm
  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'


In [21]:
roberta_gec = GecBERTModel(vocab_path='data/output_vocabulary/', model_paths=['model/roberta_1_gectorv2.th'], is_ensemble=True)
# xlnet_gec = GecBERTModel(vocab_path='data/output_vocabulary/', model_paths=['model/xlnet_0_gectorv2.th'], is_ensemble=True)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
input = ['I wants play video games with you'.split(), 
'You know I loves you'.split(), 'You said that you do not like me before I tell you that I love you so I very sad'.split()]
p, i, e = roberta_gec.predict(roberta_gec.preprocess(input))
roberta_gec.postprocess_batch(input, p, i, e)
# p, i, e = xlnet_gec.predict(xlnet_gec.preprocess(input))
# xlnet_gec.postprocess_batch(input, p, i, e)

[['I', 'want', 'play', 'video', 'games', 'with', 'you'],
 ['You', 'know', 'I', 'love', 'you'],
 ['You',
  'said',
  'that',
  'you',
  'did',
  'not',
  'like',
  'me',
  'before',
  'I',
  'told',
  'you',
  'that',
  'I',
  'love',
  'you',
  ',',
  'so',
  'I',
  'am',
  'very',
  'sad',
  '.']]

In [24]:
class gec(torch.nn.Module):
    def __init__(self, model, model_) -> None:
        super().__init__()
        
        self.model = model
        self.model_ = model_
        
    def forward(self, bert, bert_offsets, mask):
        token = {
            'bert': bert,
            'bert-offsets': bert_offsets,
            'mask': mask
        }
        outputs = self.model.forward(token)
        return outputs['class_probabilities_labels'], outputs['max_error_probability']
        # return self.model_._convert([outputs])

model = Seq2Labels(vocab=roberta_gec.vocab,
                               text_field_embedder=roberta_gec._get_embbeder('roberta-base', 1),
                               confidence=0,
                               del_confidence=0,
                               )
model.load_state_dict(torch.load('model/roberta_1_gectorv2.th'), strict=False)
roberta_gec_onnx = gec(model, roberta_gec).to('cuda:0')
# xlnet_gec_onnx = gec(model, xlnet_gec).to('cuda:0')

In [25]:
tokens = util.move_to_device(roberta_gec.preprocess(input)[0].as_tensor_dict(), 0)['tokens']
# tokens = util.move_to_device(xlnet_gec.preprocess(input)[0].as_tensor_dict(), 0)['tokens']
print(tokens)

{'bert': tensor([[50265,    38,  1072,   310,   569,   426,    19,    47,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0],
        [50265,   370,   216,    38,  6138,    47,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0],
        [50265,   370,    26,    14,    47,   109,    45,   101,   162,   137,
            38,  1137,    47,    14,    38,   657,    47,    98,    38,   182,
          5074]], device='cuda:0'), 'bert-offsets': tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0],
        [ 0,  1,  2,  3,  4,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20]], device='cuda:0'), 'mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1

In [26]:
roberta_gec_onnx(tokens['bert'], tokens['bert-offsets'], tokens['mask'])

(tensor([[[9.9964e-01, 2.1862e-06, 2.0243e-07,  ..., 1.4559e-10,
           4.5591e-05, 2.5049e-16],
          [9.9878e-01, 5.4107e-05, 1.4484e-07,  ..., 4.4247e-12,
           3.6149e-05, 4.5273e-16],
          [1.0072e-02, 5.0254e-04, 3.3500e-05,  ..., 6.9566e-09,
           6.4263e-04, 2.0264e-15],
          ...,
          [9.9964e-01, 2.1862e-06, 2.0243e-07,  ..., 1.4559e-10,
           4.5591e-05, 2.5049e-16],
          [9.9964e-01, 2.1862e-06, 2.0243e-07,  ..., 1.4559e-10,
           4.5591e-05, 2.5049e-16],
          [9.9964e-01, 2.1862e-06, 2.0243e-07,  ..., 1.4559e-10,
           4.5591e-05, 2.5049e-16]],
 
         [[9.9840e-01, 3.8396e-06, 2.4193e-07,  ..., 4.3083e-09,
           1.6794e-04, 2.4708e-15],
          [9.9802e-01, 2.9587e-04, 4.1409e-07,  ..., 7.7475e-11,
           4.6562e-05, 9.4058e-16],
          [9.8774e-01, 2.3780e-04, 1.3869e-04,  ..., 6.0532e-10,
           2.6357e-04, 1.2621e-15],
          ...,
          [9.9840e-01, 3.8396e-06, 2.4193e-07,  ..., 4.308

In [27]:
input_names = ['bert', 'bert-offsets', 'mask']
# output_names = ['probs', 'idx', 'error_probs']
# dynamic_axes = {
#     'bert': {0: 'batch_0', 1: 'sequence_0'},
#     'bert-offsets': {0: 'batch_1', 1: 'sequence_1'},
#     'mask': {0: 'batch_2', 1: 'sequence_2'},
#     'probs': {0: 'batch_3', 1: 'sequence_3'},
#     'idx': {0: 'batch_4', 1: 'sequence_4'},
#     'error_probs': {0: 'batch_5', 1: 'sequence_5'}
# }
output_names = ['class_probabilities_labels', 'max_error_probability']
dynamic_axes = {
    'bert': {0: 'batch_0', 1: 'sequence_0'},
    'bert-offsets': {0: 'batch_1', 1: 'sequence_1'},
    'mask': {0: 'batch_2', 1: 'sequence_2'},
    'class_probabilities_labels': {0: 'batch_3', 1: 'sequence_3'},
    'max_error_probability': {0: 'batch_4'},
    
}

torch.onnx.export(roberta_gec_onnx, (tokens['bert'], tokens['bert-offsets'], tokens['mask']), "onnx/roberta-base-old.onnx",
                    input_names=input_names, output_names=output_names, dynamic_axes=dynamic_axes, 
                    opset_version=12)

In [29]:
import onnxruntime
import numpy as np

bert = tokens['bert'].cpu().numpy()
bert_offsets = tokens['bert-offsets'].cpu().numpy()
mask = tokens['mask'].cpu().numpy()

x_onnx = onnxruntime.InferenceSession('onnx/roberta-base-old.onnx', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])

In [30]:
x_onnx.run(
    None,
    input_feed={
        'bert': bert,
        'bert-offsets': bert_offsets,
        'mask': mask
    }
)

[array([[[9.9963760e-01, 2.1861717e-06, 2.0242754e-07, ...,
          1.4558639e-10, 4.5591009e-05, 2.5048564e-16],
         [9.9877530e-01, 5.4107371e-05, 1.4484240e-07, ...,
          4.4247331e-12, 3.6149275e-05, 4.5273212e-16],
         [1.0071833e-02, 5.0253852e-04, 3.3500317e-05, ...,
          6.9565953e-09, 6.4262724e-04, 2.0263830e-15],
         ...,
         [9.9963760e-01, 2.1861717e-06, 2.0242754e-07, ...,
          1.4558639e-10, 4.5591009e-05, 2.5048564e-16],
         [9.9963760e-01, 2.1861717e-06, 2.0242754e-07, ...,
          1.4558639e-10, 4.5591009e-05, 2.5048564e-16],
         [9.9963760e-01, 2.1861717e-06, 2.0242754e-07, ...,
          1.4558639e-10, 4.5591009e-05, 2.5048564e-16]],
 
        [[9.9839586e-01, 3.8395838e-06, 2.4193085e-07, ...,
          4.3083128e-09, 1.6794422e-04, 2.4707896e-15],
         [9.9801528e-01, 2.9587277e-04, 4.1409226e-07, ...,
          7.7474978e-11, 4.6561909e-05, 9.4057556e-16],
         [9.8774081e-01, 2.3779625e-04, 1.3869278e-04, 