In [1]:
import torch 
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, T5Tokenizer, T5ForConditionalGeneration, AutoModelForSequenceClassification, AutoTokenizer
import math

from datasets import load_dataset
dataset = load_dataset("gigaword")

src_text = dataset['validation']['document'][0:100]
target_text = dataset['validation']['summary'][0:100]

torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'


Using custom data configuration default
Reusing dataset gigaword (/home/jovyan/.cache/huggingface/datasets/gigaword/default/1.2.0/c518c578e42a6afe842b09e979ee2907ea42a12b57ba992fae9e9d7347825245)


In [2]:
from datasets import load_metric
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

metric_peg = load_metric("rouge")
model_name = 'google/pegasus-gigaword'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

tokenizer = PegasusTokenizer.from_pretrained(model_name)

model = PegasusForConditionalGeneration.from_pretrained(model_name,return_dict=True,output_attentions=True,output_hidden_states=True).to(torch_device)


val_data = tokenizer.prepare_seq2seq_batch(src_text, target_text, return_tensors="pt", truncation="only_first", padding="longest", max_length=64)

input_ids_val = val_data['input_ids']
attention_masks_val = val_data['attention_mask']
labels_val = val_data['labels']

dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

batch_size = 1
dataloader_val = DataLoader(dataset_val, batch_size=batch_size)


with torch.no_grad():
    gens = []
    for b in dataloader_val:        
        b = tuple(x.to(torch_device) for x in b)
        
        inputs = {'input_ids':      b[0],
                  'attention_mask': b[1],
                  'labels':         b[2],
                 }       

        gen = tokenizer.batch_decode(model.generate(inputs['input_ids']), skip_special_tokens=True)
        ref = tokenizer.batch_decode(inputs['labels'])
        metric_peg.add_batch(predictions=gen, references=ref)
        gens.append(gen)
    gens_peg = gens 
print("Pegasus Rogue Score:", metric_peg.compute())


Pegasus Rogue Score: {'rouge1': AggregateScore(low=Score(precision=0.21493877454918708, recall=0.5825996933621931, fmeasure=0.31143866110868373), mid=Score(precision=0.23244683535375887, recall=0.627738455988456, fmeasure=0.33534018257881615), high=Score(precision=0.2502667223316767, recall=0.674471753246753, fmeasure=0.3610921382795574)), 'rouge2': AggregateScore(low=Score(precision=0.10614454744096592, recall=0.3129759289321788, fmeasure=0.15728671322914134), mid=Score(precision=0.12480797591697271, recall=0.36628445165945156, fmeasure=0.18409493307709948), high=Score(precision=0.14625668918608126, recall=0.4224850559163061, fmeasure=0.21441405507774985)), 'rougeL': AggregateScore(low=Score(precision=0.19582843537452194, recall=0.5380752164502163, fmeasure=0.2842058966015877), mid=Score(precision=0.2149085491546559, recall=0.5822586580086581, fmeasure=0.3103586146706375), high=Score(precision=0.23323155909891508, recall=0.6289301857864357, fmeasure=0.33478936257970077)), 'rougeLsum':

In [3]:
from datasets import load_metric
from transformers import T5ForConditionalGeneration, T5Tokenizer

metric_t5 = load_metric("rouge")

# initialize the model architecture and weights
model = T5ForConditionalGeneration.from_pretrained("t5-base",return_dict=True,output_attentions=True,output_hidden_states=True).to(torch_device)
# initialize the model tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-base")

#tokenizer = T5Tokenizer.from_pretrained(model_name)

#model = T5ForConditionalGeneration.from_pretrained(model_name,return_dict=True,output_attentions=True,output_hidden_states=True).to(torch_device)


val_data = tokenizer.prepare_seq2seq_batch(src_text, target_text, return_tensors="pt", truncation="only_first", padding="longest", max_length=64)

input_ids_val = val_data['input_ids']
attention_masks_val = val_data['attention_mask']
labels_val = val_data['labels']

dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

batch_size = 1
dataloader_val = DataLoader(dataset_val, batch_size=batch_size)


with torch.no_grad():
    gens = []
    for b in dataloader_val:        
        b = tuple(x.to(torch_device) for x in b)
        
        inputs = {'input_ids':      b[0],
                  'attention_mask': b[1],
                  'labels':         b[2],
                 }       
        #print(inputs['input_ids'])
        gen = tokenizer.batch_decode(model.generate(inputs['input_ids']), skip_special_tokens=True)
        ref = tokenizer.batch_decode(inputs['labels'])
        metric_t5.add_batch(predictions=gen, references=ref)
        gens.append(gen)
        
    gens_t5 = gens 
print("T5 Rogue Score:", metric_t5.compute())






Some weights of the model checkpoint at t5-base were not used when initializing T5ForConditionalGeneration: ['decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight']
- This IS expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing T5ForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


T5 Rogue Score: {'rouge1': AggregateScore(low=Score(precision=0.22889302572427564, recall=0.144005696823984, fmeasure=0.17512955932359758), mid=Score(precision=0.2603855727605726, recall=0.1640475576475542, fmeasure=0.1991862884844533), high=Score(precision=0.2923930767149516, recall=0.18343457111867045, fmeasure=0.2230310934928345)), 'rouge2': AggregateScore(low=Score(precision=0.07228363368657485, recall=0.0431990390972976, fmeasure=0.053441654854239684), mid=Score(precision=0.09532708631238039, recall=0.05747832901103024, fmeasure=0.07092422610755572), high=Score(precision=0.1166498111773847, recall=0.07015926391719733, fmeasure=0.0864654327644854)), 'rougeL': AggregateScore(low=Score(precision=0.20437245358807846, recall=0.12745291009659818, fmeasure=0.15541778238454226), mid=Score(precision=0.2333235653235653, recall=0.14621570379897536, fmeasure=0.17809706848893683), high=Score(precision=0.2667080051892551, recall=0.1658434670733107, fmeasure=0.20214716119116585)), 'rougeLsum': A

In [None]:
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli').to(torch_device)
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

In [None]:
import pandas as pd

In [4]:
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli').to(torch_device)
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
giga_dataset = load_dataset("gigaword", split='validation[:100]')

global map_i
map_i= 0


def mapper(examples):
    global map_i
    txt = tokenizer([(examples['document'], gens_peg[map_i-1][0])], return_tensors='pt', truncation=True, padding='max_length')
    map_i += 1
    return txt

gigaset = giga_dataset.map(mapper)

gigaset.set_format(type='torch', columns=['input_ids', 'attention_mask'])


Some weights of the model checkpoint at facebook/bart-large-mnli were not used when initializing BartForSequenceClassification: ['model.encoder.version', 'model.decoder.version']
- This IS expected if you are initializing BartForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using custom data configuration default
Reusing dataset gigaword (/home/jovyan/.cache/huggingface/datasets/gigaword/default/1.2.0/c518c578e42a6afe842b09e979ee2907ea42a12b57ba992fae9e9d7347825245)
Loading cached processed dataset at /home/jovyan/.cache/huggingface/datasets/gigaword/default/1.2.0/c518c578e42a6afe842b09e979ee2907

In [5]:


giga_loader = DataLoader(gigaset, batch_size=4)

print(next(iter(giga_loader)))

with torch.no_grad():
    pred_sum = 0 
    all_preds = []
    for batch in giga_loader:
        batch = {k: v[0].to(torch_device) for k, v in batch.items()}
        outputs = nli_model(**batch)
        preds = torch.argmax(F.softmax(outputs[0][:,[0,2]], dim=1), dim=1)
        all_preds.append(preds)
        pred_sum += torch.sum(preds)
print("acc: ", pred_sum.item()/100)



{'attention_mask': [tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])], 'input_ids': [tensor([[    0,  9579,    12,  ...,     1,     1,     1],
        [    0,   687,   265,  ...,     1,     1,     1],
        [    0, 15841, 19979,  ...,     1,     1,     1],
        [    0, 30453,   337,  ...,     1,     1,     1]])]}
acc:  0.59


In [6]:
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli').to(torch_device)
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
giga_dataset = load_dataset("gigaword", split='validation[:100]')

global map_i
map_i= 0


def mapper(examples):
    global map_i
    txt = tokenizer([(examples['document'], gens_t5[map_i-1][0])], return_tensors='pt', truncation=True, padding='max_length')
    map_i += 1
    return txt

gigaset = giga_dataset.map(mapper)

gigaset.set_format(type='torch', columns=['input_ids', 'attention_mask'])

Some weights of the model checkpoint at facebook/bart-large-mnli were not used when initializing BartForSequenceClassification: ['model.encoder.version', 'model.decoder.version']
- This IS expected if you are initializing BartForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Using custom data configuration default
Reusing dataset gigaword (/home/jovyan/.cache/huggingface/datasets/gigaword/default/1.2.0/c518c578e42a6afe842b09e979ee2907ea42a12b57ba992fae9e9d7347825245)
Loading cached processed dataset at /home/jovyan/.cache/huggingface/datasets/gigaword/default/1.2.0/c518c578e42a6afe842b09e979ee2907

In [7]:
giga_loader = DataLoader(gigaset, batch_size=4)

print(next(iter(giga_loader)))

with torch.no_grad():
    pred_sum = 0 
    all_preds = []
    for batch in giga_loader:
        batch = {k: v[0].to(torch_device) for k, v in batch.items()}
        outputs = nli_model(**batch)
        preds = torch.argmax(F.softmax(outputs[0][:,[0,2]], dim=1), dim=1)
        all_preds.append(preds)
        pred_sum += torch.sum(preds)
print("acc: ", pred_sum.item()/100)

{'attention_mask': [tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])], 'input_ids': [tensor([[    0,  9579,    12,  ...,     1,     1,     1],
        [    0,   687,   265,  ...,     1,     1,     1],
        [    0, 15841, 19979,  ...,     1,     1,     1],
        [    0, 30453,   337,  ...,     1,     1,     1]])]}
acc:  0.96


In [None]:
for i in range(0,5):
    print("source:" + src_text[i])
    print("t5: " , gens_t5[i])
    print("peg: " , gens_peg[i])
    print("---")

In [None]:
print(pred_sum)

T5 Rogue Score: {'rouge1': AggregateScore(low=Score(precision=0.2966691977655987, recall=0.2036274477789973, fmeasure=0.23716962717796183), mid=Score(precision=0.32774092692223494, recall=0.22337519123855604, fmeasure=0.2600901675971215), high=Score(precision=0.3594898691231797, recall=0.24309025568430678, fmeasure=0.28338490713237025)), 'rouge2': AggregateScore(low=Score(precision=0.062419979448655925, recall=0.04363452547087416, fmeasure=0.05075159968829902), mid=Score(precision=0.0840016980568451, recall=0.057916705091434165, fmeasure=0.06777929030558708), high=Score(precision=0.10377296720844512, recall=0.07106273660872148, fmeasure=0.08307433778614123)), 'rougeL': AggregateScore(low=Score(precision=0.202436916237787, recall=0.1359903322909964, fmeasure=0.15874322272641056), mid=Score(precision=0.22759298415722562, recall=0.152414494474319, fmeasure=0.17752891242380794), high=Score(precision=0.2589370350001735, recall=0.17073807643800232, fmeasure=0.19982035293926703)), 'rougeLsum': AggregateScore(low=Score(precision=0.19860973256726933, recall=0.13568206336082134, fmeasure=0.1583982080532767), mid=Score(precision=0.22779802642747682, recall=0.15268691045945715, fmeasure=0.17799272088433366), high=Score(precision=0.2577967100935269, recall=0.17101019801314493, fmeasure=0.19913058321901614))}


Pegasus Rogue Score: {'rouge1': AggregateScore(low=Score(precision=0.21493877454918708, recall=0.5825996933621931, fmeasure=0.31143866110868373), mid=Score(precision=0.23244683535375887, recall=0.627738455988456, fmeasure=0.33534018257881615), high=Score(precision=0.2502667223316767, recall=0.674471753246753, fmeasure=0.3610921382795574)), 'rouge2': AggregateScore(low=Score(precision=0.10614454744096592, recall=0.3129759289321788, fmeasure=0.15728671322914134), mid=Score(precision=0.12480797591697271, recall=0.36628445165945156, fmeasure=0.18409493307709948), high=Score(precision=0.14625668918608126, recall=0.4224850559163061, fmeasure=0.21441405507774985)), 'rougeL': AggregateScore(low=Score(precision=0.19582843537452194, recall=0.5380752164502163, fmeasure=0.2842058966015877), mid=Score(precision=0.2149085491546559, recall=0.5822586580086581, fmeasure=0.3103586146706375), high=Score(precision=0.23323155909891508, recall=0.6289301857864357, fmeasure=0.33478936257970077)), 'rougeLsum': AggregateScore(low=Score(precision=0.197700128893641, recall=0.5380785984848485, fmeasure=0.2862235542660331), mid=Score(precision=0.21450136539512288, recall=0.5822554112554111, fmeasure=0.3102634700806859), high=Score(precision=0.23290415319010727, recall=0.6268768127705627, fmeasure=0.3351327039250084))}