In [None]:
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration
from rouge_score import rouge_scorer
from bert_score import score as bert_score

test_df = pd.read_csv("D:\\Downloads\\sem 6\\nlp\\Project\\EnglishNews_test.csv")

model = BartForConditionalGeneration.from_pretrained("D:\\Downloads\\sem 6\\nlp\\Project\\model_checkpoint")
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

generated_summaries = []
target_summaries = []
for _, row in test_df.iterrows():
    article = row['Article']
    target_summary = row['Heading']

    inputs = tokenizer(article, padding="max_length", truncation=True, max_length=1024, return_tensors="pt")
    generated_ids = model.generate(inputs.input_ids, max_length=150, num_beams=4, early_stopping=True)
    generated_summary = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    generated_summaries.append(generated_summary)
    target_summaries.append(target_summary)

rouge_scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
rouge_scores = [rouge_scorer.score(gen_summary, target_summary)['rougeL'].fmeasure 
                for gen_summary, target_summary in zip(generated_summaries, target_summaries)]
avg_rouge_l_f1 = sum(rouge_scores) / len(rouge_scores)

bert_scores = bert_score(generated_summaries, target_summaries, lang='en', model_type='bert-base-uncased')
avg_bert_score = bert_scores[2].mean().item() 

print("Test ROUGE-L (n=2) F1:", avg_rouge_l_f1)
print("Test BERTScore F1:", avg_bert_score)

test_generated_df = pd.DataFrame({'Article': test_df['Article'], 'Generated_Summary': generated_summaries})
test_generated_df.to_csv("D:\\Downloads\\sem 6\\nlp\\Project\\test_generated_summary.csv", index=False)