In [1]:
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [2]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [3]:
sequence = "I've been waiting for a HuggingFace course my whole life."

## Make tokens by hand

In [4]:
tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
# Add the [CLS] and [SEP] tokens, as well
cls_id = tokenizer.convert_tokens_to_ids(tokenizer.tokenize("[CLS]"))
sep_id = tokenizer.convert_tokens_to_ids(tokenizer.tokenize("[SEP]"))
input_ids = torch.tensor(cls_id + ids + sep_id).unsqueeze(0)

In [5]:
model(input_ids).logits

tensor([[-1.5607,  1.6123]], grad_fn=<AddmmBackward0>)

## Use built-in approach

In [6]:
tokenized_inputs = tokenizer(sequence, return_tensors="pt")

In [7]:
model(**tokenized_inputs).logits

tensor([[-1.5607,  1.6123]], grad_fn=<AddmmBackward0>)

In [8]:
input_ids

tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102]])

In [9]:
tokenized_inputs.input_ids

tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102]])

## Use single sequence multiple times to make a "batch"

In [10]:
model(torch.concat((input_ids, input_ids), 0)).logits

tensor([[-1.5607,  1.6123],
        [-1.5607,  1.6123]], grad_fn=<AddmmBackward0>)

## Padding

In [14]:
sequence1_ids = np.repeat(200, 3)
sequence2_ids = np.repeat(200, 2)
batched_ids = np.stack(
    (sequence1_ids, np.append(sequence2_ids, tokenizer.pad_token_id)),
    0
)

In [15]:
print(sequence1_ids)
print(sequence2_ids)

[200 200 200]
[200 200]


In [17]:
print(model(torch.from_numpy(sequence1_ids)[None, ...]).logits)
print(model(torch.from_numpy(sequence2_ids)[None, ...]).logits)
print(model(torch.from_numpy(batched_ids)).logits)

tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward0>)
tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)
tensor([[ 1.5694, -1.3895],
        [ 1.3374, -1.2163]], grad_fn=<AddmmBackward0>)


## Attention masking

In [22]:
attention_mask = (batched_ids > 0).astype("int")
print(attention_mask)

[[1 1 1]
 [1 1 0]]


In [23]:
outputs = model(
    torch.from_numpy(batched_ids),
    attention_mask=torch.from_numpy(attention_mask)
)

In [24]:
print(outputs.logits)

tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)
