In [6]:
print("Hello, starting...")
# Installation of Required Libraries
!pip install transformers datasets torch tensorflow-macos tensorflow-metal 

import torch
import time
import tensorflow as tf
import os

# Check if MPS (Metal Performance Shaders) is available
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")

print("Library installed.")

if torch.backends.mps.is_available():
    print("MPS backend is available!")
else:
    print("MPS backend is not available.")

devices = tf.config.experimental.list_physical_devices('GPU')
print("Devices: ", devices)
    

Hello, starting...
Library installed.
MPS backend is available!
Devices:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [7]:
import os

#os.environ["PYTORCH_ENABLE_MPS_FALLBACK"]="1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"]="0.0"

In [8]:
# Check if MPS is available and set the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print("Device: ", device)

Device:  mps


In [None]:
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric, DatasetDict
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
import itertools

# Ensure MPS is used on a MacBook M3 if available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# Load the IMDB dataset
dataset = load_dataset("imdb")

# Split dataset into train, validation, and test sets
train_val_split = dataset['train'].train_test_split(test_size=0.1)
test_split = dataset['test'].train_test_split(test_size=0.1)
train_data, val_data = train_val_split['train'], train_val_split['test']
test_data = test_split['test']

# Create a DatasetDict to work with the Trainer API
dataset_dict = DatasetDict({
    'train': train_data,
    'validation': val_data,
    'test': test_data
})

# Initialize the tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained("roberta-large")
model = RobertaForSequenceClassification.from_pretrained("roberta-large")
model.to(device)

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset_dict.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# Define metrics
accuracy_metric = load_metric("accuracy")
f1_metric = load_metric("f1")

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    acc = accuracy_metric.compute(predictions=preds, references=labels)["accuracy"]
    f1 = f1_metric.compute(predictions=preds, references=labels, average="weighted")["f1"]
    return {"accuracy": acc, "f1": f1}

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    warmup_steps=500,
    gradient_accumulation_steps=4,
    logging_dir="./logs",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="none",
    save_total_limit=1,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
)

# Train and evaluate the model
trainer.train()
trainer.evaluate()

# Save the model to the Hugging Face Hub
model.push_to_hub("finetuned-roberta-large-imdb")

# Function to plot confusion matrix
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# Get predictions
predictions = trainer.predict(tokenized_datasets["test"])
preds = np.argmax(predictions.predictions, axis=1)

# Compute confusion matrix
cm = confusion_matrix(tokenized_datasets["test"]["labels"], preds)
plot_confusion_matrix(cm, classes=["Negative", "Positive"])
plt.show()

# Print accuracy, precision, recall, f1 score
accuracy = accuracy_score(tokenized_datasets["test"]["labels"], preds)
precision, recall, f1, _ = precision_recall_fscore_support(tokenized_datasets["test"]["labels"], preds, average='weighted')
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Print few sample false positives and false negatives
false_positives = np.where((preds == 1) & (tokenized_datasets["test"]["labels"] == 0))[0]
false_negatives = np.where((preds == 0) & (tokenized_datasets["test"]["labels"] == 1))[0]

print("Sample False Positives:")
for idx in false_positives[:5]:
    print(tokenized_datasets["test"][idx])

print("Sample False Negatives:")
for idx in false_negatives[:5]:
    print(tokenized_datasets["test"][idx])

# Inference function
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=-1).item()
    return "Positive" if prediction == 1 else "Negative"

# Download model for inference
model = RobertaForSequenceClassification.from_pretrained("jigarcpatel/finetuned-roberta-large-imdb").to(device)

# Sample inference
texts = ["This movie is fantastic!", "I hated every moment of this film."]
for text in texts:
    print(f"Text: {text} | Sentiment: {predict(text)}")


Using device: mps


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/22500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

Map:   0%|          | 0/2500 [00:00<?, ? examples/s]

  accuracy_metric = load_metric("accuracy")


Epoch,Training Loss,Validation Loss,Accuracy,F1
0,2104995.328,,0.5072,0.341364
