# Test Models

In [1]:
import datasets
import torch
import numpy as np
import time
from transformers import AutoModel, AutoTokenizer
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 2
%matplotlib inline

# Load a Model

In [2]:
model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2')
for param in model.parameters():
    param.requires_grad=False

tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2')

# Test Output

In [31]:
tokens = tokenizer(
    ['hello there', 'how are you'],
    truncation=True,
    max_length=256,
    padding=True,
    return_tensors='pt',
)
print(tokens)

mask = tokens['attention_mask'].to(torch.bool).cpu().numpy()
output = model(**tokens)
token_embeddings = output.last_hidden_state
sentence_embeddings = output.pooler_output

print(torch.mean(token_embeddings[0][mask[0]], dim=0)[:10])
print(sentence_embeddings[0][:10])

{'input_ids': tensor([[   0, 7596, 2049,    2,    1],
        [   0, 2133, 2028, 2021,    2]]), 'attention_mask': tensor([[1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1]])}
tensor([ 0.0960,  0.1087, -0.0512, -0.0275,  0.1791, -0.0571, -0.0415,  0.0808,
         0.0232,  0.1003])
tensor([-0.0349, -0.0268, -0.0715, -0.0212, -0.0618,  0.0105, -0.0404, -0.0501,
        -0.0493, -0.0160])
tensor([[[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],

        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]]])
tensor([[[ 0.0325, -0.0889, -0.0546,  ...,  0.1759, -0.1717, -0.0519],
         [ 0.10