# Fine-Tuned RoBERTa Model

In [1]:
from transformers import RobertaForQuestionAnswering, RobertaTokenizer
from datasets import load_dataset, load_metric
import torch
from transformers import TrainingArguments, Trainer
from accelerate import Accelerator

In [2]:
train_data_path = '../data/roberta_train.pt'
val_data_path = '../data/roberta_val.pt'
test_data_path = '../data/roberta_test.pt'

In [3]:
train_data = torch.load('../data/roberta_train.pt')
val_data = torch.load('../data/roberta_val.pt')
test_data = torch.load('../data/roberta_test.pt')

In [4]:
print(f'The number of rows in input_ids: {len(train_data["input_ids"])}')
print(f'The number of rows in attention_mask: {len(train_data["attention_mask"])}')
print(f'The number of rows in start_positions: {len(train_data["start_positions"])}')
print(f'The number of rows in end_positions: {len(train_data["end_positions"])}')
print(f'The number of rows in answer_mask: {len(train_data["answer_mask"])}')

The number of rows in input_ids: 67275
The number of rows in attention_mask: 67275
The number of rows in start_positions: 67275
The number of rows in end_positions: 67275
The number of rows in answer_mask: 67275


In [18]:
test_train_data = {'input_ids': train_data['input_ids'],
                   'attention_mask': train_data['attention_mask'],
                   'start_positions': train_data['start_positions'],
                   'end_positions': train_data['end_positions']}

test_val_data = {'input_ids': val_data['input_ids'],
                 'attention_mask': val_data['attention_mask'],
                 'start_positions': val_data['start_positions'],
                 'end_positions': val_data['end_positions']}

In [11]:
model_name = "roberta-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForQuestionAnswering.from_pretrained(model_name)

Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
# Define training arguments (optimizer, learning rate etc.)
training_args = TrainingArguments(
    output_dir="../models",  # Replace with your output directory
    per_device_train_batch_size=75,
    save_steps=500,
    num_train_epochs=3,
    learning_rate=2e-5,
)

In [13]:
def compute_metrics(pred):
    """Calculates F1 score, ROUGE metrics, and accuracy for Roberta predictions.
    
    Args:
    pred: A dictionary containing the model's predictions (answers) 
         and ground truth labels (references).
    
    Returns:
    A dictionary containing F1 score, ROUGE metrics (precision, recall, f1),
    and accuracy.
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Load Rouge metric
    rouge = load_metric("rouge")
    
    # Compute Rouge scores
    rouge_outputs = rouge.compute(predictions=preds, references=labels, rouge_types=["rougeL"])
    rouge_score = rouge_outputs["rougeL"]
    
    # Compute F1 score (you can use sklearn or other libraries)
    from sklearn.metrics import f1_score
    f1 = f1_score(labels, preds, average="weighted")
    
    # Calculate accuracy
    accuracy = (labels == preds).mean()  # Calculate proportion of correct predictions
    
    # Return dictionary with metrics
    return {"f1": f1, "rougeL": rouge_score["precision"], "rougeL_recall": rougeL["recall"], "rougeL_f1": rougeL["f1"], "accuracy": accuracy}

In [19]:
# Create trainer object
accelerator = Accelerator()

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=test_train_data,
    eval_dataset=test_val_data,
    compute_metrics=compute_metrics
)

In [20]:
# Start training
trainer.train()

KeyError: 2

In [None]:
# Save the fine-tuned model
model.save_pretrained("../models/fine-tuned-roberta")

In [None]:
def predict(context, question, tokenizer, model):
    """Uses the fine-tuned model to predict the answer based on context and question.
    
    Args:
      context: The passage or document containing relevant information.
      question: The question to be answered.
      tokenizer: The tokenizer used to process text into model inputs.
      model: The fine-tuned question answering model.
    
    Returns:
      answer: The predicted answer string from the context.
    """
    
    # Preprocess context and question (replace with your model's specific requirements)
    inputs = tokenizer(context, question, return_tensors="pt")  # Example using PyTorch tensors
    
    # Make predictions using your model (replace with your model's forward pass logic)
    with torch.no_grad():  # Disable gradient calculation for efficiency
        outputs = model(**inputs)
        start_scores, end_scores = outputs.start_logits, outputs.end_logits
    
    # Extract predicted answer based on start and end scores
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores) + 1  # Account for inclusive end index
    answer = tokenizer.convert_tokens_to_strings(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))[0]
    
    return answer

In [None]:
# Test
context = "This is a sample context for question answering."
question = "What is the context about?"
answer = predict(context, question)
print(f"Predicted Answer: {answer}")