In [None]:
import torch
from transformers import RobertaTokenizer, RobertaForTokenClassification
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load fine-tuned model
model_path = "../models/fine_tuned_roberta"
model = RobertaForTokenClassification.from_pretrained(model_path)
tokenizer = RobertaTokenizer.from_pretrained(model_path)

# Load test data
data_path = "../data/test_data.csv"
df_test = pd.read_csv(data_path)
test_inputs = df_test['text'].tolist()
test_labels = df_test['labels'].tolist()

# Tokenize and encode test data
tokenized_inputs = tokenizer(test_inputs, padding=True, truncation=True, return_tensors="pt")
attention_mask = tokenized_inputs['attention_mask']

# Define test data loader
batch_size = 8
test_data = TensorDataset(tokenized_inputs['input_ids'], attention_mask)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

# Make predictions on test set
predictions = []
model.eval()
for batch in test_dataloader:
    batch = tuple(t.to(device) for t in batch)
    inputs = {'input_ids': batch[0], 'attention_mask': batch[1]}
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs[0]
    predictions.extend([list(p) for p in torch.argmax(logits, dim=2).cpu().numpy()])

# Convert predictions to labels
idx2label = {i: label for label, i in labels_dict.items()}
predicted_labels = [[idx2label[p] for p in prediction] for prediction in predictions]

# Print classification report
print(classification_report(test_labels, predicted_labels))
