# Inference
---

In [1]:
import numpy as np
from os.path import join


## Load vocab

In [2]:
vocab_dir = "../data/"
vocab_file = "vocab.txt"
vocab = np.loadtxt(join(vocab_dir, vocab_file), dtype=str)


In [None]:
print(vocab[:10])


## Load model

In [3]:
from utils.model import NERModel

2022-09-22 13:42:31.514590: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-09-22 13:42:37.216020: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-09-22 13:42:37.216463: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = NERModel(tags=5, vocab_size=len(vocab), predict=True)
model.init_from_file("../models/model.pkl.gz", weights_only=True)


((((),
   ((), ((), (((), (), ()), ()))),
   (((),
     array([[ 0.04834103,  0.00655262, -0.03937843, ..., -0.02282618,
              0.05089082, -0.01925213],
            [ 0.03651088,  0.05338385, -0.01430334, ..., -0.05976418,
             -0.0312531 ,  0.056483  ],
            [-0.02776174,  0.04745229,  0.01275015, ..., -0.07472883,
             -0.03755604, -0.06483809],
            ...,
            [ 0.01899912, -0.01843667,  0.0620845 , ...,  0.01042803,
              0.03759629,  0.04589635],
            [ 0.06259501,  0.05849517, -0.04939735, ...,  0.00050123,
             -0.01864036,  0.00301137],
            [ 0.05299981,  0.0506544 ,  0.00274936, ...,  0.03125998,
             -0.02276541, -0.00567416]], dtype=float32),
     (),
     ((),
      array([[ 0.03682806,  0.96291196, -0.03645538, ...,  1.0366589 ,
               0.03620976,  1.0309874 ],
             [ 0.87828434,  0.5034949 ,  0.78642917, ...,  1.0366427 ,
               0.0365635 ,  1.0350322 ],
            

## Create sentence

In [5]:
from trax.data import tokenize, detokenize


In [6]:
sentence = "ZAPATILLAS LA SPORTIVA MUTANT MUJER Ã“PALO AGUA 40 Agua"

In [7]:
from trax.data.tf_inputs import vocab_size

In [8]:
size = vocab_size(
    vocab_type="char", vocab_file=vocab_file, vocab_dir=vocab_dir
)


In [11]:
tokenized = list(
    tokenize(
        iter([sentence]),
        vocab_dir=vocab_dir,
        vocab_file=vocab_file,
        vocab_type="char",
    )
)[0]
tokenized = tokenized[None, :]  # Batch dimension


## Infer

In [12]:
from trax.supervised.decoding import autoregressive_sample

In [13]:
tokenized_output = autoregressive_sample(model, tokenized, temperature=0.0)
tokenized_output = tokenized_output[0][:-1]

In [14]:
output = detokenize(
    tokenized_output,
    vocab_dir=vocab_dir,
    vocab_file=vocab_file,
)


In [15]:
print(output)



