In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load tokenizer and models
tokenizer_base = AutoTokenizer.from_pretrained("google/flan-t5-base")
model_base = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

tokenizer_large = AutoTokenizer.from_pretrained("google/flan-t5-large")
model_large = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")


In [None]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()

In [None]:
import pandas as pd
import os
MAX_TARGET_LENGTH = 8
test_df = pd.read_csv('test.csv')

In [None]:
# Prompt Variation 1
def prompt_v1(text):
    return f"Generate a title for the following article:\n{text}"

# Prompt Variation 2
def prompt_v2(text):
    return f"Write a concise headline for this news article:\n{text}"

In [None]:

def generate_title(prompted_texts, tokenizer, model, beam=True, batch_size=4):
    model = model.to(device)
    generated_titles = []

    for i in range(0, len(prompted_texts), batch_size):
        batch = prompted_texts[i:i + batch_size]
        inputs = tokenizer(batch, return_tensors='pt', padding=True, truncation=True).to(device)

        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=MAX_TARGET_LENGTH,
                num_beams=5 if beam else 1,
                early_stopping=True
            )

        titles = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        generated_titles.extend(titles)

    return generated_titles



In [None]:
# Prompted text lists
test_texts = test_df["text"].tolist()
prompted_v1 = [prompt_v1(text) for text in test_texts]
prompted_v2 = [prompt_v2(text) for text in test_texts]

# Generate titles using both prompts and both models
test_df["flan_base_title_v1"] = generate_title(prompted_v1, tokenizer_base, model_base)
test_df["flan_base_title_v2"] = generate_title(prompted_v2, tokenizer_base, model_base)

test_df["flan_large_title_v1"] = generate_title(prompted_v1, tokenizer_large, model_large)
test_df["flan_large_title_v2"] = generate_title(prompted_v2, tokenizer_large, model_large)

In [None]:
from rouge_score import rouge_scorer

def evaluate_rouge(preds, refs):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = [scorer.score(ref, pred) for ref, pred in zip(refs, preds)]

    avg_rouge1 = sum([s['rouge1'].fmeasure for s in scores]) / len(scores)
    avg_rouge2 = sum([s['rouge2'].fmeasure for s in scores]) / len(scores)
    avg_rougeL = sum([s['rougeL'].fmeasure for s in scores]) / len(scores)

    return avg_rouge1, avg_rouge2, avg_rougeL


In [None]:
a1, a2, al = evaluate_rouge(test_df["flan_base_title_v1"], test_df["title"])
b1, b2, bl = evaluate_rouge(test_df["flan_base_title_v2"], test_df["title"])
c1, c2, cl = evaluate_rouge(test_df["flan_large_title_v1"], test_df["title"])
d1, d2, dl = evaluate_rouge(test_df["flan_large_title_v2"], test_df["title"])
print("Flan-T5 Base Prompt V2 - ROUGE-1:", a1, "ROUGE-2:", a2, "ROUGE-L:",al)
print("Flan-T5 Base Prompt V2 - ROUGE-1:", b1, "ROUGE-2:", b2, "ROUGE-L:",bl)
print("Flan-T5 Large Prompt V1 - ROUGE-1:", c1, "ROUGE-2:", c2, "ROUGE-L:",cl)
print("Flan-T5 Large Prompt V2 - ROUGE-1:", d1, "ROUGE-2:", d2, "ROUGE-L:",dl)