In [None]:
import warnings
warnings.filterwarnings('ignore')

from transformers import pipeline
from datasets import load_dataset
import numpy as np
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset

In [None]:
def evaluate_performance(y_true, y_pred):
    """Create and print the classification report"""

    from sklearn.metrics import confusion_matrix, classification_report

    print(confusion_matrix(y_true, y_pred))

    performance = classification_report(
        y_true, y_pred,
        target_names=["Negative Review", "Positive Review"]
    )
    print(performance)

In [None]:
# Load our data
data = load_dataset("rotten_tomatoes")

print(data["test"].shape)

print("First 5 reviews + labels:")
for i in range(5):
    print(data["test"][i])

print("Last 5 reviews + labels:")
for i in range(5):
    print(data["test"][-i-1])

In [None]:
# Path to our HF model
model = "cardiffnlp/twitter-roberta-base-sentiment-latest"

# Load model into pipeline
pipe = pipeline(
    model=model,
    tokenizer=model,
    return_all_scores=True,
    device="mps"
)

In [None]:
# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(data["test"], "text")), total=len(data["test"])):
    negative_score = output[0]["score"]
    positive_score = output[2]["score"]
    assignment = np.argmax([negative_score, positive_score])
    y_pred.append(assignment)

In [None]:
evaluate_performance(data["test"]["label"], y_pred)

In [None]:
print(y_pred[0:5])

In [None]:
print(y_pred[-5:])