In [None]:
from transformers import BartTokenizerFast, BartForConditionalGeneration, BartTokenizer
from datasets import load_metric
from sumeval.metrics.rouge import RougeCalculator
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('data/summarize/news_data.csv')

In [None]:
rouge = RougeCalculator(stopwords=True, lang="en")
metric = load_metric('bleu')
model = BartForConditionalGeneration.from_pretrained('sshleifer/distilbart-xsum-6-6')
tokenizer = BartTokenizer.from_pretrained('sshleifer/distilbart-xsum-6-6')

model.to('cuda')
model.eval()

In [None]:
selected_df = df.iloc[:100]

In [None]:
rogue_scores = {
    'rogue_1': [],
    'rogue_2': [],
    'rogue_l': []
}

for index, data in selected_df.iterrows():
    text = data['text']
    headlines = data['headlines']
    
    inputs = tokenizer.encode_plus(
        text,max_length=100, padding='max_length',
        return_tensors='pt', truncation=True
    )
    
    output = model.generate(
        inputs['input_ids'].to('cuda'),
#         min_length = round(0.1 * len(text.split(' '))), max_length = round(0.2 * len(text.split(' '))), do_sample=False
    )
    summ_text = tokenizer.decode(output.squeeze().detach().cpu(), skip_special_tokens=True, clean_up_tokenization_spaces=True)
    
    rouge_1 = rouge.rouge_n(
        summary=summ_text,
        references=headlines,
        n=1
    )
    
    rogue_2 = rouge.rouge_n(
        summary=summ_text,
        references=headlines,
        n=2
    )
    
    rogue_l = rouge.rouge_l(
        summary=summ_text,
        references=headlines
    )
    
    rogue_scores['rogue_1'].append(rouge_1)
    rogue_scores['rogue_2'].append(rogue_2)
    rogue_scores['rogue_l'].append(rogue_l)

np.mean(rogue_scores['rogue_1']), np.mean(rogue_scores['rogue_2']), np.mean(rogue_scores['rogue_l'])