# Handling multiple sequences (PyTorch)

Install the Transformers, Datasets, and Evaluate libraries to run this notebook.

In [1]:
!pip install datasets evaluate transformers[sentencepiece]

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence = "I've been waiting for a HuggingFace course my whole life."

tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = torch.tensor([ids])
# This line will fail.
model(input_ids)

SequenceClassifierOutput(loss=None, logits=tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [5]:
tokenized_inputs = tokenizer(sequence, return_tensors="pt")
print(tokenized_inputs["input_ids"])

tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102]])


In [10]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence = "I've been waiting for a HuggingFace course my whole life."

tokens = tokenizer.tokenize(sequence)
ids = tokenizer.convert_tokens_to_ids(tokens)

input_ids = torch.tensor([ids])
print("Input IDs:", input_ids)

output = model(input_ids)
print("Logits:", output.logits)

Input IDs: tensor([[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,
          2026,  2878,  2166,  1012]])
Logits: tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)


In [11]:
batched_idss = [ids, ids]
input2= torch.tensor(batched_idss)

output2 = model(input_ids=input2)
print(output2.logits)



tensor([[-2.7276,  2.8789],
        [-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)


In [12]:
batched_ids = [
    [200, 200, 200],
    [200, 200]
]

In [13]:
padding_id = 100

batched_ids = [
    [200, 200, 200],
    [200, 200, padding_id],
]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

sequence1_ids = [[200, 200, 200]]
sequence2_ids = [[200, 200]]
batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

print(model(torch.tensor(sequence1_ids)).logits)
print(model(torch.tensor(sequence2_ids)).logits)
print(model(torch.tensor(batched_ids)).logits)

tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward>)
tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
tensor([[ 1.5694, -1.3895],
        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward>)

In [16]:
batched_ids = [
    [200, 200, 200],
    [200, 200, tokenizer.pad_token_id],
]

attention_mask = [
    [1, 1, 1],
    [1, 1, 0],
]

outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
print(outputs.logits)

tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)


In [70]:
def add_padding(pad_token,input_ids):
  max_len = len(max(input_ids, key=len))
  attention_mask=[]
  for idx,seq in enumerate(input_ids):
    seq_len=len(seq)
    seq_len_diff = max_len - seq_len

    if seq_len_diff > 0:
      input_ids[idx].extend([pad_token] * seq_len_diff)
      attention_mask.append([1] * seq_len + [0]*seq_len_diff)
    else:
      attention_mask.append([1] * seq_len)

  return (input_ids,attention_mask)


In [92]:
raw_inputs=[
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]


ids=[]
for sequence in raw_inputs:
  seq_tokens=tokenizer.tokenize(sequence)
  seq_ids=tokenizer.convert_tokens_to_ids(seq_tokens)
  ids.append(seq_ids)

padded_inputs,attention_mask=add_padding(tokenizer.pad_token_id,ids)

seq1_output=model(torch.tensor(padded_inputs[0]).unsqueeze(0),
                  attention_mask=torch.tensor(attention_mask[0]).unsqueeze(0)
                  )
seq2_output=model(
    torch.tensor(padded_inputs[1]).unsqueeze(0),
    attention_mask=torch.tensor(attention_mask[1]).unsqueeze(0)
    )

print(seq1_output.logits)
print(seq1_output.logits)
# Sentence 1 Logits:
# tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
# Sentence 2 Logits
# tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)


batch_seq_output = model(torch.tensor(padded_inputs), attention_mask=torch.tensor(attention_mask))
print(batch_seq_output.logits)
#Batch logits:
# tensor([[-2.7276,  2.8789],
        # [ 3.1931, -2.6685]], grad_fn=<AddmmBackward0>)


tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
tensor([[-2.7276,  2.8789]], grad_fn=<AddmmBackward0>)
tensor([[-2.7276,  2.8789],
        [ 3.1931, -2.6685]], grad_fn=<AddmmBackward0>)


In [83]:
print(torch.tensor([1,2,3]).shape)
print(torch.tensor([[1,2,3]]).shape)
print(torch.tensor([1,2,3]).unsqueeze(0).shape)

torch.Size([3])
torch.Size([1, 3])
torch.Size([1, 3])


In [None]:
sequence = sequence[:max_sequence_length]