In [None]:
# library dependencies
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

## Text encoding:
### Convert strings to an array of numbers that can be fed as input to a model

In [None]:
# load a tokenizer - used to encode the text
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

In [None]:
# Step 1: tokenization - split text into "words"
text = "I would like to go fishing."
print(tokenizer.tokenize(text))
# text = "I would like to go to the PyMNtos meet-up."
# print(tokenizer.tokenize(text))

In [None]:
# Step 2: convert tokens to numbers
tokens = tokenizer.tokenize(text)
print(tokenizer.convert_tokens_to_ids(tokens))
# print(len(tokens))
# print(len(tokenizer.convert_tokens_to_ids(tokens)))

# Sentiment Analysis Demo
## Using a pre-trained transformer model and the Hugging Face transformers library

### Load a pre-trained [tokenizer and model for sentiment analysis](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment) from the [Hugging Face model repository](https://huggingface.co/models)

In [None]:
SENTIMENT_MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL_NAME)

### Run inference and examine the output

In [None]:
text = "I am so happy that I joined the PyMNtos group."
# text = "Really disappointed in the PyMNTos talk that Alex Cunliffe gave - so boring!"
# text = "When is the next meet-up?"

tokenized_text = tokenizer(text, return_tensors="pt")
with torch.no_grad():
    out = model(**tokenized_text)
print("out:", out.logits[0])

### Apply softmax activation
 - commonly used for multi-class classification
 - standardizes outputs to numbers between 0 and 1
 - sum(softmax outputs) = 1

In [None]:
softmax_result = torch.softmax(out.logits[0], dim=0)

print("Text:", text)
print("Softmax result:", softmax_result)
print("Predicted sentiment:", ["Negative", "Neutral", "Positive"][np.argmax(softmax_result.numpy())])