In [1]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
vocab_size = tokenizer.vocab_size
embedding_dim = 64

In [3]:
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.hidden_layer = nn.Linear(embedding_dim, embedding_dim)
        self.final_layer = nn.Linear(embedding_dim, 1)
        
    def forward(self, input_ids, attention_mask=None):
        # [batch_size, seq_len] -> [batch_size, seq_len, embedding_dim]
        embeddings = self.embedding(input_ids)
        
        # [batch_size, seq_len, embedding_dim] -> [batch_size, seq_len, embedding_dim]
        out1 = self.hidden_layer(embeddings)
        
        # [batch_size, seq_len, embedding_dim] -> [batch_size, seq_len, 1]
        out2 = self.final_layer(out1)
        
        # [batch_size, seq_len, 1] -> [batch_size, 1]
        pooled = torch.mean(out2, dim=1)
        
        return pooled.squeeze(-1)

# inference example
def loss(outputs):
    return torch.sum(outputs)

model = SimpleModel()


In [4]:

# inference example
texts = ["Hello, how are you?", "I am fine, thank you!"]
tokens = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length = 10)
tokens

{'input_ids': tensor([[ 101, 7592, 1010, 2129, 2024, 2017, 1029,  102,    0],
        [ 101, 1045, 2572, 2986, 1010, 4067, 2017,  999,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [7]:
outputs = model(tokens["input_ids"])
print(outputs)

tensor([-0.0727, -0.1011], grad_fn=<SqueezeBackward1>)


In [8]:
loss(outputs)

tensor(-0.1738, grad_fn=<SumBackward0>)