In [10]:
%pip install transformers torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
from transformers import pipeline

classifier = pipeline('sentiment-analysis')
classifier([
    'I have been waiting for a Hugging Face course my whole life.',
    'I hate this so much!',
])

  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use mps:0


[{'label': 'POSITIVE', 'score': 0.9980935454368591},
 {'label': 'NEGATIVE', 'score': 0.9994558691978455}]

```mermaid
flowchart LR
    Tokenizer --> Model --> PP[Post Processing]
```

In [3]:
from transformers import AutoTokenizer

checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [4]:
raw_inputs = [
    'I have been waiting for a Hugging Face course my whole life.',
    'I hate this so much!',
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')
print(inputs)

{'input_ids': tensor([[  101,  1045,  2031,  2042,  3403,  2005,  1037, 17662,  2227,  2607,
          2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]])}


In [5]:
from transformers import AutoModel

checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'
model = AutoModel.from_pretrained(checkpoint)

In [6]:
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)

torch.Size([2, 15, 768])


In [7]:
from transformers import AutoModelForSequenceClassification

checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)

In [8]:
print(outputs.logits.shape)

torch.Size([2, 2])


In [9]:
print(outputs.logits)

tensor([[-3.0584,  3.2022],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)


In [11]:
import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

tensor([[1.9064e-03, 9.9809e-01],
        [9.9946e-01, 5.4419e-04]], grad_fn=<SoftmaxBackward0>)


In [12]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [23]:
for input, prediction in zip(raw_inputs, predictions):
    print(f'Sentence: {input}')
    for label, score in zip(model.config.id2label.values(), prediction):
        print(f'  {label}: {score*100:.1f}%')

Sentence: I have been waiting for a Hugging Face course my whole life.
  NEGATIVE: 0.2%
  POSITIVE: 99.8%
Sentence: I hate this so much!
  NEGATIVE: 99.9%
  POSITIVE: 0.1%
