In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_dataset
from evaluate import load
import numpy as np
import re
import gc
torch.manual_seed(42)

# Load ROUGE metric
metric = load("rouge")

In [None]:
def load_model_and_tokenizer(model_path):
    model = BartForConditionalGeneration.from_pretrained(model_path)
    tokenizer = BartTokenizer.from_pretrained(model_path)
    return model, tokenizer


def generate_summaries(model, tokenizer, dataset, num_examples=None, max_source_length=512):
    """Generates summaries and calculates ROUGE scores."""
    model.eval()
    model.to("cpu")

    all_decoded_preds = []
    all_decoded_labels = []


    # Process all examples if num_examples is not specified, otherwise process the specified number
    num_examples = len(dataset) if num_examples is None else num_examples


    for i in range(num_examples):
        input_dialogue = dataset[i]["dialogue"]
        input_ids = tokenizer.encode(
            input_dialogue, return_tensors="pt", max_length=512, truncation=True
        ).to(model.device)

        # Generate summary with specified parameters
        gen_kwargs = {
            "max_length": 90,
            "min_length": 1,
            "num_beams": 6,
            "no_repeat_ngram_size": 2,
            "length_penalty": 1.6,
            # "do_sample": True,  # Enables sampling
            # "top_k": 50,        # Use top-k sampling
            # "top_p": 0.8,      # Use nucleus sampling
            # "temperature": 0.7  # Adjust temperature to control randomness
        }

        with torch.no_grad(): # Add this to disable gradient calculations
            generated_ids = model.generate(input_ids=input_ids, **gen_kwargs)


        generated_summary = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        reference_summary = dataset[i]['summary']

        all_decoded_preds.append(generated_summary)
        all_decoded_labels.append(reference_summary)

        # print(f"Input Dialogue:\n{input_dialogue}...") # Shorten input for display
        print(f"Dialog: {i + 1}")
        print(f"Generated Summary:\n{generated_summary}")
        print(f"Reference Summary:\n{reference_summary}")
        print("-" * 50)
        # print("Running evaluation...")



    # Calculate ROUGE scores after generating all summaries
    result = metric.compute(predictions=all_decoded_preds, references=all_decoded_labels, use_stemmer=True)
    result = {key: value * 100 for key, value in result.items()}  # Convert to percentage

    # Print ROUGE scores
    print("ROUGE Scores:")
    for key, value in result.items():
        print(f"{key}: {value:.2f}")


In [None]:
dataset = load_dataset("knkarthick/dialogsum")
test_dataset = dataset["test"]


model_checkpoint = "./finetuned_bart_dialogsum/checkpoint-2925"
model_checkpoint = "facebook/bart-large"
model, tokenizer = load_model_and_tokenizer(model_checkpoint)
generate_summaries(model, tokenizer, test_dataset, num_examples=3) #Pass dataset, num_examples to limit the processing to 10 examples


In [None]:
del model
del tokenizer
torch.cuda.empty_cache()
gc.collect()
