# Behind the pipeline

## The pipeline

In [1]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
classifier = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [3]:
classifier([
    'The new software update includes several bug fixes and performance improvements.',
    'The new software update needs several bug fixes and performance improvements.'
])

[{'label': 'NEGATIVE', 'score': 0.9167097210884094},
 {'label': 'NEGATIVE', 'score': 0.9994910955429077}]

The first sentence should have been marked as positive as there are `fixes and performance improvements that have been already included with the software update`

## The Tokenizer

In [4]:
from transformers import AutoTokenizer

In [5]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [6]:
raw_inputs = [
    "She walked her dog in the park and then went grocery shopping.",
    "Despite initial challenges, the experience proved to be quite educational."
]

In [7]:
input = tokenizer(raw_inputs, padding=True, truncation=True); input

{'input_ids': [[101, 2016, 2939, 2014, 3899, 1999, 1996, 2380, 1998, 2059, 2253, 13025, 6023, 1012, 102], [101, 2750, 3988, 7860, 1010, 1996, 3325, 4928, 2000, 2022, 3243, 4547, 1012, 102, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]]}

#### Using `return_tensors` as pytorch

In [8]:
input = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt"); input

{'input_ids': tensor([[  101,  2016,  2939,  2014,  3899,  1999,  1996,  2380,  1998,  2059,
          2253, 13025,  6023,  1012,   102],
        [  101,  2750,  3988,  7860,  1010,  1996,  3325,  4928,  2000,  2022,
          3243,  4547,  1012,   102,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])}

#### Using `return_tensors` as numpy

In [9]:
tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="np")

{'input_ids': array([[  101,  2016,  2939,  2014,  3899,  1999,  1996,  2380,  1998,
         2059,  2253, 13025,  6023,  1012,   102],
       [  101,  2750,  3988,  7860,  1010,  1996,  3325,  4928,  2000,
         2022,  3243,  4547,  1012,   102,     0]]), 'attention_mask': array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])}

## The Model without Head

In [10]:
from transformers import AutoModel

In [11]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

model = AutoModel.from_pretrained(checkpoint)

In [12]:
output = model(**input)

In [13]:
output.last_hidden_state.shape

torch.Size([2, 15, 768])

In [14]:
output["last_hidden_state"].shape

torch.Size([2, 15, 768])

In [15]:
output[0].shape

torch.Size([2, 15, 768])

## The Model with Head

In [16]:
from transformers import AutoModelForSequenceClassification

In [17]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

In [18]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
output = model(**input)

In [19]:
output.logits.shape

torch.Size([2, 2])

In [20]:
output.logits

tensor([[ 2.8401, -2.4250],
        [-3.5235,  3.6534]], grad_fn=<AddmmBackward0>)

## Softmax

In [21]:
import torch

predictions = torch.nn.functional.softmax(output.logits, dim=-1)
predictions

tensor([[9.9486e-01, 5.1425e-03],
        [7.6348e-04, 9.9924e-01]], grad_fn=<SoftmaxBackward0>)

In [22]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [23]:
torch.argmax(predictions, dim=-1)

tensor([0, 1])