In [1]:
# This is a check to see if pytorch is using NVIDIA GPU
# install pytorch using pip install torch  --index-url https://download.pytorch.org/whl/cu118

import torch

print(torch.cuda.is_available())

print(torch.__version__)


True
2.6.0+cu118


In [2]:
from transformers import pipeline

# transformers documentation: https://huggingface.co/transformers/v4.10.1/main_classes/pipelines.html

# create a classifier object for sentiment analysis

classifier = pipeline(task="sentiment-analysis", framework="pt") 

# simple non-ambiguous examples to begin with
classifier_input = [
    "I love the new movie",
    "I hate the new movie",
    "I kinda do not know how I feel about the new movie", # ambiguous example
    "I am not sure how I feel about the new movie", # ambiguous example
]

# call classifier with input

classifier_output = classifier(classifier_input)

print(classifier_output)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cuda:0


[{'label': 'POSITIVE', 'score': 0.9998722076416016}, {'label': 'NEGATIVE', 'score': 0.999622106552124}, {'label': 'NEGATIVE', 'score': 0.9984796643257141}, {'label': 'NEGATIVE', 'score': 0.9985328912734985}]


In [3]:
from transformers import AutoTokenizer

# TODO: study this particular checkpoint

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# print(tokenizer)

inputs = tokenizer(classifier_input, padding=True, truncation=True, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[  101,  1045,  2293,  1996,  2047,  3185,   102,     0,     0,     0,
             0,     0,     0,     0],
        [  101,  1045,  5223,  1996,  2047,  3185,   102,     0,     0,     0,
             0,     0,     0,     0],
        [  101,  1045, 17704,  2079,  2025,  2113,  2129,  1045,  2514,  2055,
          1996,  2047,  3185,   102],
        [  101,  1045,  2572,  2025,  2469,  2129,  1045,  2514,  2055,  1996,
          2047,  3185,   102,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])}


In [9]:
from transformers import AutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

# load model
model = AutoModel.from_pretrained(checkpoint)

# run the model with tokenized inputs to get the output
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)


torch.Size([4, 14, 768])


In [6]:
from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(**inputs)

print(outputs.logits.shape)
print(outputs.logits)


torch.Size([4, 2])
tensor([[-4.3119,  4.6533],
        [ 4.3538, -3.5269],
        [ 3.5596, -2.9277],
        [ 3.5888, -2.9343]], grad_fn=<AddmmBackward0>)


In [8]:
import torch
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

print(predictions)

print(model.config.id2label)

tensor([[1.2777e-04, 9.9987e-01],
        [9.9962e-01, 3.7784e-04],
        [9.9848e-01, 1.5203e-03],
        [9.9853e-01, 1.4671e-03]], grad_fn=<SoftmaxBackward0>)
{0: 'NEGATIVE', 1: 'POSITIVE'}
