In [4]:
from gliner import GLiNER

model_path = "../models/gliner_medium-v2.1"

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from src.word_process.inputs_outputs_process import DataProcess

data_process = DataProcess(model_path)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
text = """
Libretto by Marius Petipa, based on the 1822 novella ``Trilby, ou Le Lutin d'Argail`` by Charles Nodier, first presented by the Ballet of the Moscow Imperial Bolshoi Theatre on January 25/February 6 (Julian/Gregorian calendar dates), 1870, in Moscow with Polina Karpakova as Trilby and Ludiia Geiten as Miranda and restaged by Petipa for the Imperial Ballet at the Imperial Bolshoi Kamenny Theatre on January 17–29, 1871 in St. Petersburg with Adèle Grantzow as Trilby and Lev Ivanov as Count Leopold.
"""

labels = ["person", "book", "location", "date", "actor", "character"]

In [26]:
inputs, raw_batch = data_process.prepare_model_inputs([text], labels)

In [28]:
import torch
import onnxruntime
ort_sess = onnxruntime.InferenceSession("../models/gliner_medium-v2.1/model.onnx")

outputs = ort_sess.run(None, {'input_ids': inputs['input_ids'].numpy(),
                            'attention_mask': inputs['attention_mask'].numpy(),
                            'words_mask': inputs['words_mask'].numpy(),
                            'text_lengths': inputs['text_lengths'].numpy(),
                            'span_idx': inputs['span_idx'].numpy(),
                            'span_mask': inputs['span_mask'].numpy(),
                            })[0]
outputs

array([[[[ -4.7636776 ,  -4.2976203 ,  -4.116477  ,  -7.035563  ,
           -9.894087  ,  -7.4880915 ],
         [ -8.262269  , -12.352715  , -11.220077  , -10.343983  ,
          -13.137346  , -14.714043  ],
         [-10.471894  , -15.731391  , -13.373329  , -15.038684  ,
          -16.880022  , -17.588394  ],
         ...,
         [ -8.046515  ,  -4.714387  ,  -8.437423  , -10.586559  ,
          -15.44794   , -13.28521   ],
         [-13.631119  , -19.325733  , -19.001099  , -17.626635  ,
          -21.054552  , -24.232693  ],
         [-13.784239  , -19.712738  , -19.218586  , -16.724798  ,
          -20.799793  , -24.24158   ]],

        [[ -8.033905  , -14.901958  ,  -9.891759  , -10.122342  ,
          -12.113761  , -11.662863  ],
         [-11.870149  , -20.119553  , -14.889593  , -16.738356  ,
          -18.160854  , -18.726141  ],
         [ -6.759687  ,  -9.722449  , -10.42029   , -12.322544  ,
          -11.859332  , -13.656987  ],
         ...,
         [-12.509389  , -

In [29]:
import torch
outputs = data_process.decode(raw_batch["tokens"], raw_batch["id_to_classes"], torch.tensor(outputs))

TypeError: sigmoid(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

: 

In [10]:
outputs[0]

[(12, 12, 'character', 0.9745467305183411),
 (33, 33, 'location', 0.9113682508468628),
 (38, 42, 'date', 0.7391266226768494),
 (51, 51, 'date', 0.5650821924209595),
 (54, 54, 'location', 0.9242361187934875),
 (56, 57, 'actor', 0.9268550276756287),
 (59, 59, 'character', 0.9865744709968567),
 (61, 62, 'actor', 0.8973000645637512),
 (64, 64, 'character', 0.7989766597747803),
 (80, 85, 'date', 0.9063624143600464),
 (87, 89, 'location', 0.9150590300559998),
 (91, 92, 'actor', 0.9332297444343567),
 (94, 94, 'character', 0.9905427098274231),
 (96, 97, 'actor', 0.9347705245018005),
 (99, 100, 'character', 0.8545386791229248)]

In [23]:
import bentoml
runner = bentoml.onnx.get("ner:latest").to_runner()
runner.init_local()

'Runner.init_local' is for debugging and testing only. Make sure to remove it before deploying to production.


In [24]:
output = runner.run.run(inputs['input_ids'].numpy(),
                inputs['attention_mask'].numpy(),
                inputs['words_mask'].numpy(),
                inputs['text_lengths'].numpy(),
                inputs['span_idx'].numpy(),
                inputs['span_mask'].numpy())

In [25]:
output

(array([[[[ -4.7636776 ,  -4.2976203 ,  -4.116477  ,  -7.035563  ,
            -9.894087  ,  -7.4880915 ],
          [ -8.262269  , -12.352715  , -11.220077  , -10.343983  ,
           -13.137346  , -14.714043  ],
          [-10.471894  , -15.731391  , -13.373329  , -15.038684  ,
           -16.880022  , -17.588394  ],
          ...,
          [ -8.046515  ,  -4.714387  ,  -8.437423  , -10.586559  ,
           -15.44794   , -13.28521   ],
          [-13.631119  , -19.325733  , -19.001099  , -17.626635  ,
           -21.054552  , -24.232693  ],
          [-13.784239  , -19.712738  , -19.218586  , -16.724798  ,
           -20.799793  , -24.24158   ]],
 
         [[ -8.033905  , -14.901958  ,  -9.891759  , -10.122342  ,
           -12.113761  , -11.662863  ],
          [-11.870149  , -20.119553  , -14.889593  , -16.738356  ,
           -18.160854  , -18.726141  ],
          [ -6.759687  ,  -9.722449  , -10.42029   , -12.322544  ,
           -11.859332  , -13.656987  ],
          ...,
    