# Handling Multiple Sequences

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification 

  from .autonotebook import tqdm as notebook_tqdm


## Model expects a batch of input

In [2]:
checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [3]:
poem = '''
Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;
Then took the other, as just as fair,
And having perhaps the better claim
Because it was grassy and wanted wear,
Though as for that the passing there
Had worn them really about the same,
And both that morning equally lay
In leaves no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way
I doubted if I should ever come back.
I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I,
I took the one less traveled by,
And that has made all the difference.
'''

In [4]:
tokens = tokenizer.tokenize(poem)
ids = tokenizer.convert_tokens_to_ids(tokens)

In [6]:
input_ids = torch.tensor(ids)

In [11]:
# model(input_ids)
## it will fail because the input_ids is not batched

In [12]:
tokenized_inputs = tokenizer(poem, return_tensors="pt") 

In [14]:
tokenized_inputs['input_ids']

tensor([[  101,  2048,  4925, 17856,  5999,  1999,  1037,  3756,  3536,  1010,
          1998,  3374,  1045,  2071,  2025,  3604,  2119,  1998,  2022,  2028,
         20174,  1010,  2146,  1045,  2768,  1998,  2246,  2091,  2028,  2004,
          2521,  2004,  1045,  2071,  2000,  2073,  2009,  6260,  1999,  1996,
          2104, 26982,  1025,  2059,  2165,  1996,  2060,  1010,  2004,  2074,
          2004,  4189,  1010,  1998,  2383,  3383,  1996,  2488,  4366,  2138,
          2009,  2001, 22221,  1998,  2359,  4929,  1010,  2295,  2004,  2005,
          2008,  1996,  4458,  2045,  2018,  6247,  2068,  2428,  2055,  1996,
          2168,  1010,  1998,  2119,  2008,  2851,  8053,  3913,  1999,  3727,
          2053,  3357,  2018, 19817,  7716,  4181,  2304,  1012,  2821,  1010,
          1045,  2921,  1996,  2034,  2005,  2178,  2154,   999,  2664,  4209,
          2129,  2126,  5260,  2006,  2000,  2126,  1045, 12979,  2065,  1045,
          2323,  2412,  2272,  2067,  1012,  1045,  

In [15]:
input_ids = torch.tensor([ids])

In [16]:
output = model(input_ids)

In [17]:
input_ids

tensor([[ 2048,  4925, 17856,  5999,  1999,  1037,  3756,  3536,  1010,  1998,
          3374,  1045,  2071,  2025,  3604,  2119,  1998,  2022,  2028, 20174,
          1010,  2146,  1045,  2768,  1998,  2246,  2091,  2028,  2004,  2521,
          2004,  1045,  2071,  2000,  2073,  2009,  6260,  1999,  1996,  2104,
         26982,  1025,  2059,  2165,  1996,  2060,  1010,  2004,  2074,  2004,
          4189,  1010,  1998,  2383,  3383,  1996,  2488,  4366,  2138,  2009,
          2001, 22221,  1998,  2359,  4929,  1010,  2295,  2004,  2005,  2008,
          1996,  4458,  2045,  2018,  6247,  2068,  2428,  2055,  1996,  2168,
          1010,  1998,  2119,  2008,  2851,  8053,  3913,  1999,  3727,  2053,
          3357,  2018, 19817,  7716,  4181,  2304,  1012,  2821,  1010,  1045,
          2921,  1996,  2034,  2005,  2178,  2154,   999,  2664,  4209,  2129,
          2126,  5260,  2006,  2000,  2126,  1045, 12979,  2065,  1045,  2323,
          2412,  2272,  2067,  1012,  1045,  4618,  

In [18]:
output.logits

tensor([[ 1.7040, -1.4993]], grad_fn=<AddmmBackward0>)

### Multiple inputs

In [19]:
batched_ids = [ids, ids]

In [20]:
batched_input_ids = torch.tensor(batched_ids)

In [21]:
model(batched_input_ids)

SequenceClassifierOutput(loss=None, logits=tensor([[ 1.7040, -1.4993],
        [ 1.7040, -1.4993]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

## Padding the inputs

In [22]:
batched_ids = [[200, 200, 200], [200, 200]]

In [23]:
padding_id = 100
batched_ids = [[200, 200, 200], [200, 200, padding_id]]

In [24]:
tokenizer.pad_token_id

0

In [25]:
sequence1_ids = [[200, 200, 200]]

In [26]:
sequence2_ids = [[200, 200]]

In [27]:
batched_ids = [[200, 200, 200],
               [200, 200, tokenizer.pad_token_id]]

In [28]:
model(torch.tensor(sequence1_ids)).logits

tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward0>)

In [29]:
model(torch.tensor(sequence2_ids)).logits

tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)

In [30]:
model(torch.tensor(batched_ids)).logits

tensor([[ 1.5694, -1.3895],
        [ 1.3374, -1.2163]], grad_fn=<AddmmBackward0>)

## Attention Masks

In [43]:
batched_ids = [
    [200, 200, 200], 
    [200, 200, tokenizer.pad_token_id]
]

In [44]:
attention_mask = [
    [1, 1, 1],
    [1, 1, 0]
]

In [47]:
outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
outputs.logits

tensor([[ 1.5694, -1.3895],
        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward0>)

## Try it out

In [67]:
proverb1 = "Absence makes the heart grow fonder"
proverb2 = "Honesty is the best policy"

In [68]:
tokens1 = tokenizer.tokenize(proverb1)
ids1 = tokenizer.convert_tokens_to_ids(tokens1)

In [75]:
tokens2 = tokenizer.tokenize(proverb2)
ids2 = tokenizer.convert_tokens_to_ids(tokens2)

In [70]:
ids1, len(ids1)

([6438, 3084, 1996, 2540, 4982, 13545, 2121], 7)

In [71]:
ids2, len(ids2)

([16718, 2003, 1996, 2190, 3343], 5)

In [72]:
outputs1 = model(torch.tensor([ids1])); outputs1.logits

tensor([[-3.9582,  4.2422]], grad_fn=<AddmmBackward0>)

In [73]:
outputs2 = model(torch.tensor([ids2])); outputs2.logits

tensor([[-3.7858,  4.1988]], grad_fn=<AddmmBackward0>)

#### Lets batched the two inputs

In [76]:
attention_mask1 = [1] * len(ids1)
attention_mask2 = [1] * len(ids2)

In [77]:
attention_mask1

[1, 1, 1, 1, 1, 1, 1]

In [78]:
attention_mask2

[1, 1, 1, 1, 1]

In [79]:
attention_mask2.extend([0, 0])

In [80]:
attention_mask2

[1, 1, 1, 1, 1, 0, 0]

In [81]:
ids1

[6438, 3084, 1996, 2540, 4982, 13545, 2121]

In [83]:
ids2.extend([0, 0])

In [84]:
ids2

[16718, 2003, 1996, 2190, 3343, 0, 0]

In [85]:
model(torch.tensor([ids1, ids2]), attention_mask=torch.tensor([attention_mask1, attention_mask2]))

SequenceClassifierOutput(loss=None, logits=tensor([[-3.9582,  4.2422],
        [-3.7858,  4.1988]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)