In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_metric, load_dataset
import torch

# Load the trained model and tokenizer
model_path = "./fine_tuned_lora_model_summary"  # Replace with your model path
tokenizer_path = "./fine_tuned_lora_tokenizer_summary"  # Replace with your tokenizer path
model = AutoModelForCausalLM.from_pretrained(model_path).cuda()
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

# Load the dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test[:500]")  # Use a subset for faster evaluation

# Metrics
rouge = load_metric("rouge")

In [None]:
from nltk.translate.bleu_score import sentence_bleu
# Function to generate summaries
def generate_summary(input_text, max_length=128, min_length=30):
    inputs = tokenizer.encode(input_text, return_tensors="pt", truncation=True, max_length=512).cuda()
    outputs = model.generate(
        inputs, max_length=max_length, min_length=min_length, length_penalty=2.0, num_beams=4
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
# Evaluation
rouge_scores = []
bleu_scores = []

print("Evaluating the model...")
for data in dataset:
    article = data["article"]
    reference_summary = data["highlights"]

    # Generate the summary
    generated_summary = generate_summary(article)

    # Calculate ROUGE scores
    rouge_result = rouge.compute(
        predictions=[generated_summary],
        references=[reference_summary]
    )
    rouge_scores.append(rouge_result)

    # Calculate BLEU score
    reference_tokens = [reference_summary.split()]
    generated_tokens = generated_summary.split()
    bleu_score = sentence_bleu(reference_tokens, generated_tokens)
    bleu_scores.append(bleu_score)

In [None]:

# Calculate average scores
average_rouge = {
    key: sum(score[key].mid.fmeasure for score in rouge_scores) / len(rouge_scores)
    for key in rouge_scores[0]
}
average_bleu = sum(bleu_scores) / len(bleu_scores)

# Display results
print("\nAverage ROUGE Scores:")
for key, value in average_rouge.items():
    print(f"{key.upper()}: {value:.4f}")

print(f"\nAverage BLEU Score: {average_bleu:.4f}")