<a href="https://colab.research.google.com/github/bijayabc/mBERT-finetuning/blob/main/BERT_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install transformers datasets lightning

In [None]:
from datasets import load_dataset
import lightning as L
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import torch

In [None]:
dataset = load_dataset('IRIIS-RESEARCH/Sentiment-Analysis-Nepali')
print(dataset)

In [None]:
test_dataset_raw = dataset['test']

In [None]:
# initialize the bert tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')

def tokenize_function(examples):
    # texts = [str(t) if t is not None else "" for t in examples['Sentences']]
    texts = [str(t) if t is not None else "" for t in examples['sentences']]
    return tokenizer(texts, padding='max_length', truncation=True, max_length=128)

# Tokenize each split separately
test_dataset = test_dataset_raw.map(tokenize_function, batched=True)
# test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'Sentiment'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'sentiment'])

In [None]:
from torch.utils.data import DataLoader, Dataset

# try using num_workers=2 and see if it is faster
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=12, pin_memory=True)

for batch in test_loader:
    input_ids = batch['input_ids']
    attention_mask = batch['attention_mask']
    # labels = batch['Sentiment']
    labels = batch['sentiment']
    # feed these into your model

In [None]:
model_name = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

model.eval()  # important: disable dropout

In [None]:
from torchmetrics import Accuracy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
acc = Accuracy(task="multiclass", num_classes=3).to(device)  # <--- move metric to GPU

model.to(device)
model.eval()

for batch in test_loader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)
    # labels = batch["Sentiment"].to(device)
    labels = batch["sentiment"].to(device)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
        acc.update(preds, labels)

baseline_accuracy = acc.compute().item()
print(f"Baseline mBERT accuracy (zero-shot): {baseline_accuracy:.4f}")
