### Imports

In [1]:
from IPython.display import display_html, clear_output
from itertools import chain,cycle
from copy import deepcopy
import urllib.request
import transformers
import numpy as np
import json
import time
import os
import torch
import random 
import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import GroupShuffleSplit
from datasets import *
from transformers import AutoTokenizer, PreTrainedTokenizerFast, EncoderDecoderModel, Seq2SeqTrainingArguments, Seq2SeqTrainer, AdamW, DataCollatorForSeq2Seq
#from allennlp_models.rc.tools import squad

import plotly.express as px

# Display dataframes
def display(*args,titles=cycle([''])):
    html_str=''
    for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
        html_str+='<th style="text-align:left"><td style="vertical-align:top">'
        html_str+=f'<h4 style="text-align: left;">{title}</h2>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        html_str+='</td></th>'
    display_html(html_str,raw=True)
    
# Setting seeds for reproducibility
def set_reproducibility(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    transformers.set_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    os.environ['TF_DETERMINISTIC_OPS'] = '1'



In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # use the GPU
else:
    device = torch.device("cpu")  # use the CPU

print("Using device:", device)

Using device: cuda


In [3]:
"""
Functions taken from [the official evaluation script]
(https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/)
for SQuAD version 2.0.
"""
import collections
import re
import string
from typing import Callable, Sequence, TypeVar, Tuple


def make_qid_to_has_ans(dataset):
    qid_to_has_ans = {}
    for article in dataset:
        for p in article["paragraphs"]:
            for qa in p["qas"]:
                qid_to_has_ans[qa["id"]] = bool(qa["answers"])
    return qid_to_has_ans


def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def get_tokens(s):
    if not s:
        return []
    return normalize_answer(s).split()


def compute_exact(a_pred: str, a_gold: str) -> int:
    return int(normalize_answer(a_pred) == normalize_answer(a_gold))


def compute_f1(a_pred: str, a_gold: str) -> float:
    pred_toks = get_tokens(a_pred)
    gold_toks = get_tokens(a_gold)
    common = collections.Counter(pred_toks) & collections.Counter(gold_toks)  # type: ignore[var-annotated]
    num_same = sum(common.values())
    if len(pred_toks) == 0 or len(gold_toks) == 0:
        # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
        return float(pred_toks == gold_toks)
    if num_same == 0:
        return 0.0
    precision = 1.0 * num_same / len(pred_toks)
    recall = 1.0 * num_same / len(gold_toks)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


_P = TypeVar("_P")
_G = TypeVar("_G")
_T = TypeVar("_T", int, float, Tuple[int, ...], Tuple[float, ...])


def metric_max_over_ground_truths(
    metric_fn: Callable[[_P, _G], _T], prediction: _P, ground_truths: Sequence[_G]
) -> _T:
    scores_for_ground_truths = []
    for ground_truth in ground_truths:
        score = metric_fn(prediction, ground_truth)
        scores_for_ground_truths.append(score)
    return max(scores_for_ground_truths)


def get_metric_score(prediction: str, gold_answers: Sequence[str]) -> Tuple[int, float]:
    exact_scores = metric_max_over_ground_truths(compute_exact, prediction, gold_answers)
    f1_scores = metric_max_over_ground_truths(compute_f1, prediction, gold_answers)
    return exact_scores, f1_scores

In [None]:
def check_tokens(tokenizer):
    # Get the special tokens and their corresponding IDs
    special_tokens = tokenizer.special_tokens_map
    special_ids = tokenizer.convert_tokens_to_ids(list(special_tokens.values()))
    print("Special tokens:")
    for token_type, token_list in special_tokens.items():
        print(f"{token_type}: {token_list}")
    # Print the special tokens and their corresponding IDs
    for token, id in zip(special_tokens.keys(), special_ids):
        print(f"{token}: {id}")

### Dataset Download

In [4]:
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)
        
def download_url(url, output_path):
    with DownloadProgressBar(unit='B', unit_scale=True,
                             miniters=1, desc=url.split('/')[-1]) as t:
        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)

def download_data(data_path, url_path, suffix):    
    if not os.path.exists(data_path):
        os.makedirs(data_path)
        
    data_path = os.path.join(data_path, f'{suffix}.json')

    if not os.path.exists(data_path):
        print(f"Downloading CoQA {suffix} data split... (it may take a while)")
        download_url(url=url_path, output_path=data_path)
        urllib.request.urlretrieve(url_path, filename=data_path)
        print("Download completed!")

In [5]:
# Train data
train_url = "https://nlp.stanford.edu/data/coqa/coqa-train-v1.0.json"
download_data(data_path='coqa', url_path=train_url, suffix='train')

# Test data
test_url = "https://nlp.stanford.edu/data/coqa/coqa-dev-v1.0.json"
download_data(data_path='coqa', url_path=test_url, suffix='test') 

### Preprocessing

In [6]:
# Creating Dataframes and removing unanswerable questions
train_data = json.load((open('coqa/train.json')))
test_data = json.load((open('coqa/test.json')))

qas = pd.json_normalize(train_data['data'], ['questions'], ['source', 'id', 'story'])
ans = pd.json_normalize(train_data['data'], ['answers'],['id'])
train_val_df = pd.merge(qas,ans, left_on=['id','turn_id'], right_on=['id','turn_id'])
train_val_df = train_val_df.loc[train_val_df['input_text_y']!='unknown']

qas = pd.json_normalize(test_data['data'], ['questions'], ['source', 'id', 'story'])
ans = pd.json_normalize(test_data['data'], ['answers'],['id'])
test_df = pd.merge(qas,ans, left_on=['id','turn_id'], right_on=['id','turn_id'])
test_df = test_df.loc[test_df['input_text_y']!='unknown']

In [7]:
# Removing bad turns
train_val_df = train_val_df.loc[(train_val_df['bad_turn_x'] != 'True') & (train_val_df['bad_turn_y'] != 'True')]

# Removing equal text/answer entries
train_val_df = train_val_df[train_val_df.story != train_val_df.input_text_y]
test_df = test_df[test_df.story != test_df.input_text_y]

# Removing enties with empty answers
train_val_df = train_val_df[train_val_df['input_text_y'].str.len()>0]
test_df = test_df[test_df['input_text_y'].str.len()>0]

In [8]:
# Text preprocess
def preprocess(ds,columns):
    ds = ds.replace(r'\n',' ', regex=True)
#     ds = ds.replace(r'[^\w\s]+', ' ', regex=True)
#     for feature in columns:
#         ds[feature] = ds[feature].str.lower().str.strip()
        
    return ds

columns = ['story', 'input_text_x', 'span_text', 'input_text_y']

train_val_df = preprocess(train_val_df,columns)
test_df = preprocess(test_df,columns)

In [9]:
# Train/Validation Split
set_reproducibility(42)

train_inds, val_inds = next(GroupShuffleSplit(test_size=.20, n_splits=2, random_state = 42).split(train_val_df, groups=train_val_df['id']))

train_df = train_val_df.iloc[train_inds]
val_df = train_val_df.iloc[val_inds].reset_index()

print(train_df.columns)

Index(['input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story',
       'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y'],
      dtype='object')


In [10]:
# Checking the Dataframes
print(f'Training set [{train_df.shape}]')
print(f'\tFeatures: {list(train_df.columns)}')
display(train_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

print(f'Validation set [{val_df.shape}]')
print(f'\tFeatures: {list(val_df.columns)}')
display(val_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

print(f'Test set [{test_df.shape}]')
print(f'\tFeatures: {list(test_df.columns)}')
display(test_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

Training set [(85823, 11)]
	Features: ['input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3zotghdk5ibi9cex97fepx7jetpso7,how many items are in this secret collection?,150000,"Vatican Secret Archives were separated from the library at the beginning of the 17th century; they contain another 150,000 items."
12,3zotghdk5ibi9cex97fepx7jetpso7,Can anyone use this library?,anyone who can document their qualifications and research needs.,The Vatican Library is open to anyone who can document their qualifications and research needs.
14,3zotghdk5ibi9cex97fepx7jetpso7,what must be requested in person or by mail?,Photocopies,Photocopies for private study of pages from books published between 1801 and 1990 can be requested in person or by mail.
15,3zotghdk5ibi9cex97fepx7jetpso7,of what books?,only books published between 1801 and 1990,hotocopies for private study of pages from books published between 1801 and 1990


Validation set [(21452, 12)]
	Features: ['index', 'input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,Where was Milly led to?,Cottonwoods,led Milly Erne to Cottonwoods
12,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,Who took her there?,A man,the man who had led Milly Erne to Cottonwoods
13,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,Whose name would Jane not speak?,this Mormon's name,this Mormon's name
14,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,Did she allow herself to even think it?,No,she did not even think it.
15,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,What was Jane hoping Lassiter would become to her?,"a helper, of a friend, of a champion","the need of a helper, of a friend, of a champio"


Test set [(7917, 9)]
	Features: ['input_text_x', 'turn_id', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3dr23u6we5exclen4th8uq9rb42tel,Did they want Cotton to change the color of her fur?,no,We would never want you to be any other way
12,3azhrg4cu4ktme1zh7c2ro3pn2430d,what was the name of the fish,Asta.,Asta.
13,3azhrg4cu4ktme1zh7c2ro3pn2430d,What looked like a birds belly,a bottle,a bottle
14,3azhrg4cu4ktme1zh7c2ro3pn2430d,who said that,Asta.,"""It looks like a bird's belly,"" said Asta."
15,3azhrg4cu4ktme1zh7c2ro3pn2430d,Was Sharkie a friend?,Yes,Asta's friend Sharkie


In [11]:
# Overlap Check
set_train = set(train_df['id'])
set_val = set(val_df['id'])

overlap = False
for i in set_train:
    if i in set_val:
        overlap = True
        break

print('Overlap' if overlap else 'No overlap')

No overlap


In [12]:
# Dataframes to Datasets
train_df_to_ds = train_df[columns]
val_df_to_ds = val_df[columns]
test_df_to_ds = test_df[columns]

train_df_to_ds = train_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})
val_df_to_ds = val_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})
test_df_to_ds = test_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})

In [13]:
# Datasets Batch split
batch_size = 8
ratio = 2

train_samples = (round(train_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size

val_samples = (round(val_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size
test_samples = (round(test_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size

train_dataset = Dataset.from_dict(train_df_to_ds.iloc[:train_samples])
val_dataset = Dataset.from_dict(val_df_to_ds.iloc[:val_samples])
test_dataset = Dataset.from_dict(test_df_to_ds.iloc[:test_samples])

dataset_COQA = DatasetDict({'train':train_dataset,'validation':val_dataset,'test':test_dataset})
print(dataset_COQA)

DatasetDict({
    train: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 1712
    })
    validation: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 424
    })
    test: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 152
    })
})


In [14]:
max_length_input = 512
max_length_answer = 42

In [15]:
def prepare_features(batch, tokenizer, max_length_input, max_length_answer):
    # Tokenize the Question and Context columns
    encoded_batch_inputs = tokenizer(
        batch['question'],
        batch['context'],
        max_length=max_length_input,
        truncation='only_second',
        padding='max_length',
        return_tensors='pt'        
    )

    # Tokenize the Answer column
    encoded_batch_labels = tokenizer(
        batch['answer'],
        max_length=max_length_answer,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    
    encoded_batch_inputs['labels'] = encoded_batch_labels.input_ids
#   encoded_batch_inputs['decoder_input_ids'] = deepcopy(encoded_batch_inputs['labels'])
#   encoded_batch_inputs['labels'] = [[-100 if token == tokenizer.pad_token_id else token\
#                                    for token in labels]\
#                                    for labels in encoded_batch_inputs['labels']]
    
    encoded_batch_inputs['labels_mask'] = encoded_batch_labels.attention_mask


    return encoded_batch_inputs

### Tokenization

In [16]:
model_checkpoint_M1 = 'distilroberta-base'
# Tokenizer
tokenizer_M1 = AutoTokenizer.from_pretrained(model_checkpoint_M1)
assert isinstance(tokenizer_M1, PreTrainedTokenizerFast)
tokenizer_M1.bos_token = tokenizer_M1.cls_token
tokenizer_M1.eos_token = tokenizer_M1.sep_token
check_tokens(tokenizer_M1)

Special tokens:
bos_token: <s>
eos_token: </s>
unk_token: <unk>
sep_token: </s>
pad_token: <pad>
cls_token: <s>
mask_token: <mask>
bos_token: 0
eos_token: 2
unk_token: 3
sep_token: 2
pad_token: 1
cls_token: 0
mask_token: 50264


In [17]:
# Tokenizing the Dataset
tokenized_datasets_M1 = DatasetDict()

# Use the `prepare_features` functions
tokenized_datasets_M1 = dataset_COQA.map(
    lambda batch: prepare_features(batch, tokenizer_M1, max_length_input, max_length_answer),
    batched=True,
    batch_size=batch_size,
    remove_columns=dataset_COQA['train'].column_names
)

print(tokenized_datasets_M1)

  0%|          | 0/214 [00:00<?, ?ba/s]

  0%|          | 0/53 [00:00<?, ?ba/s]

  0%|          | 0/19 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 1712
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 424
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 152
    })
})


### Model definition

In [18]:
# Load Model
model_M1 = EncoderDecoderModel.from_encoder_decoder_pretrained(model_checkpoint_M1, model_checkpoint_M1, tie_encoder_decoder=False)

# Model special tokens
model_M1.config.decoder_start_token_id = tokenizer_M1.cls_token_id
model_M1.config_eos_token_id = tokenizer_M1.sep_token_id
model_M1.config.pad_token_id = tokenizer_M1.pad_token_id
model_M1.config.vocab_size = model_M1.config.encoder.vocab_size

# Model hyperparams
model_M1.config.max_length = max_length_answer
model_M1.config.min_length = 1
model_M1.config.no_repeat_ngram_size = 1
model_M1.config.early_stopping = True
model_M1.config.repetition_penalty= 3.
model_M1.config.num_beams = 8

print(f"Parameters #: {model_M1.num_parameters()}")

model_M1.to(device)

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForCausalLM were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['roberta.encoder.layer.2.crossattention.output.LayerNorm.weight', 'roberta.encoder.layer.5.crossattention.self.query.bias', 'roberta.encoder.layer.0.crossatten

Parameters #: 178472025


EncoderDecoderModel(
  (encoder): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): L

In [19]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions
    
    labels_text = tokenizer_M1.batch_decode(labels, skip_special_tokens=True)
    preds_text = tokenizer_M1.batch_decode(preds, skip_special_tokens=True)
    
    squad_scores=[]
    for i in range(len(preds_text)):
        squad_scores.append(compute_f1(str(preds_text[i]), str(labels_text[i])))
    mean_squad_f1 = sum(squad_scores)/len(squad_scores)

    return {"squad_f1_score": mean_squad_f1}

### Training

In [20]:
epochs = 3

training_args_M1 = Seq2SeqTrainingArguments(
    output_dir='./M1_Checkpoints',
    evaluation_strategy="epoch",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    predict_with_generate=True,
    overwrite_output_dir=True,
    #save_total_limit=2,
    fp16=True, 
    num_train_epochs = epochs,
    weight_decay=0.01,
    logging_steps=10
    #resume_from_checkpoint = True
)

# Optimizer and scheduler
optimizer_M1 = AdamW(model_M1.parameters(),lr= 5e-5)
train_steps  = epochs*len(tokenized_datasets_M1['train'])/batch_size
scheduler_M1 = transformers.get_cosine_schedule_with_warmup(optimizer=optimizer_M1,num_warmup_steps=50,num_training_steps=train_steps)
optimizers_M1 = optimizer_M1, scheduler_M1

trainer_M1 = Seq2SeqTrainer(
    model=model_M1,
    tokenizer=tokenizer_M1,
    args=training_args_M1,
    compute_metrics=compute_metrics,
    train_dataset=tokenized_datasets_M1['train'],
    eval_dataset=tokenized_datasets_M1['validation'],
    optimizers=optimizers_M1,
    data_collator=DataCollatorForSeq2Seq(tokenizer_M1,model=model_M1)
)



Using cuda_amp half precision backend


In [21]:
os.environ["WANDB_DISABLED"] = "true"

trainer_M1.train()

The following columns in the training set don't have a corresponding argument in `EncoderDecoderModel.forward` and have been ignored: labels_mask. If labels_mask are not expected by `EncoderDecoderModel.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 1712
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 642
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Epoch,Training Loss,Validation Loss,Squad F1 Score
1,0.8835,0.73093,0.018661
2,0.6932,0.687643,0.103449
3,0.7551,0.683339,0.049163


The following columns in the evaluation set don't have a corresponding argument in `EncoderDecoderModel.forward` and have been ignored: labels_mask. If labels_mask are not expected by `EncoderDecoderModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 424
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `EncoderDecoderModel.forward` and have been ignored: labels_mask. If labels_mask are not expected by `EncoderDecoderModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 424
  Batch size = 8
Saving model checkpoint to ./M1_Checkpoints\checkpoint-500
Configuration saved in ./M1_Checkpoints\checkpoint-500\config.json
Model weights saved in ./M1_Checkpoints\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./M1_Checkpoints\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./M1_Checkpoints\checkpoint-500\special_token

TrainOutput(global_step=642, training_loss=1.2746291621080441, metrics={'train_runtime': 175.956, 'train_samples_per_second': 29.189, 'train_steps_per_second': 3.649, 'total_flos': 1585257005629440.0, 'train_loss': 1.2746291621080441, 'epoch': 3.0})

In [22]:
# Initialize the data collator
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer_M1, model=model_M1)

# Create a DataLoader for the dataset using the data collator
test_loader = torch.utils.data.DataLoader(tokenized_datasets_M1['test'], 
                                          batch_size=batch_size, 
                                          collate_fn=data_collator)
torch.cuda.empty_cache()
# Generate answersù
i=0
for batch in tqdm(test_loader):
    
    example = batch['input_ids'].to(device)
    att_mask = batch['attention_mask'].to(device)
    generated_ids = model_M1.generate(input_ids=example, 
                                      attention_mask=att_mask,
                                      max_length=max_length_answer
                                     )
    ex = tokenizer_M1.batch_decode(example, skip_special_tokens=True)
    print(ex[0])
    print(dataset_COQA['test']['question'][i:i+batch_size])
    
    generated_answers = tokenizer_M1.batch_decode(generated_ids, skip_special_tokens=True)
    print(f'Generated ans: {generated_answers}')
    true = batch["labels"]
    ground_truth = tokenizer_M1.batch_decode(true, skip_special_tokens=True)
    print(f'True ans: {ground_truth}')
    i+=batch_size
    
    



  5%|████▎                                                                              | 1/19 [00:00<00:06,  2.57it/s]

What color was Cotton?Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer's horses slept. But Cotton wasn't alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton's mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer's orange paint, she used it to paint herself like them. When her mommy and sisters found her they started laughing.   "What are you doing, Cotton?!"   "I only wanted to be more like you".   Cotton's mommy rubbed her face on Cotton's and said "Oh Cotton, but your fur is so pretty and special, like you. We would neve

 11%|████████▋                                                                          | 2/19 [00:00<00:06,  2.66it/s]

What did Cotton's mother and siblings do when they saw her painted orange?Once upon a time, in a barn near a farm house, there lived a little white kitten named Cotton. Cotton lived high up in a nice warm place above the barn where all of the farmer's horses slept. But Cotton wasn't alone in her little home above the barn, oh no. She shared her hay bed with her mommy and 5 other sisters. All of her sisters were cute and fluffy, like Cotton. But she was the only white one in the bunch. The rest of her sisters were all orange with beautiful white tiger stripes like Cotton's mommy. Being different made Cotton quite sad. She often wished she looked like the rest of her family. So one day, when Cotton found a can of the old farmer's orange paint, she used it to paint herself like them. When her mommy and sisters found her they started laughing.   "What are you doing, Cotton?!"   "I only wanted to be more like you".   Cotton's mommy rubbed her face on Cotton's and said "Oh Cotton, but your f

 16%|█████████████                                                                      | 3/19 [00:01<00:05,  2.90it/s]

did they get the bottle?Once there was a beautiful fish named Asta. Asta lived in the ocean. There were lots of other fish in the ocean where Asta lived. They played all day long.   One day, a bottle floated by over the heads of Asta and his friends. They looked up and saw the bottle. "What is it?" said Asta's friend Sharkie. "It looks like a bird's belly," said Asta. But when they swam closer, it was not a bird's belly. It was hard and clear, and there was something inside it.   The bottle floated above them. They wanted to open it. They wanted to see what was inside. So they caught the bottle and carried it down to the bottom of the ocean. They cracked it open on a rock. When they got it open, they found what was inside. It was a note. The note was written in orange crayon on white paper. Asta could not read the note. Sharkie could not read the note. They took the note to Asta's papa. "What does it say?" they asked.   Asta's papa read the note. He told Asta and Sharkie, "This note is

 21%|█████████████████▍                                                                 | 4/19 [00:01<00:04,  3.01it/s]

Do I know her?My doorbell rings. On the step, I find the elderly Chinese lady, small and slight, holding the hand of a little boy. In her other hand, she holds a paper carrier bag.   I know this lady. It is not her first visit. She is the boy's grandmother, and her daughter bought the house next door last October.   Her daughter, Nicole, speaks fluent English. But she is now in Shanghai, and her parents are here with the little boy. Nicole has obviously told her mother that I am having heart surgery soon, so her mother has decided I need more nutrients.   I know what is inside the bag--a thermos with hot soup and a stainless-steel container with rice, vegetables and either chicken, meat or shrimp, sometimes with a kind of pancake. This has become an almost-daily practice.   Communication between us is somewhat affected by the fact that she doesn't speak English and all I can say in Chinese is hello. Once, she brought an iPad as well as the food. She pointed to the screen, which display

 26%|█████████████████████▊                                                             | 5/19 [00:01<00:04,  3.02it/s]

What kind of dishes does she bring?My doorbell rings. On the step, I find the elderly Chinese lady, small and slight, holding the hand of a little boy. In her other hand, she holds a paper carrier bag.   I know this lady. It is not her first visit. She is the boy's grandmother, and her daughter bought the house next door last October.   Her daughter, Nicole, speaks fluent English. But she is now in Shanghai, and her parents are here with the little boy. Nicole has obviously told her mother that I am having heart surgery soon, so her mother has decided I need more nutrients.   I know what is inside the bag--a thermos with hot soup and a stainless-steel container with rice, vegetables and either chicken, meat or shrimp, sometimes with a kind of pancake. This has become an almost-daily practice.   Communication between us is somewhat affected by the fact that she doesn't speak English and all I can say in Chinese is hello. Once, she brought an iPad as well as the food. She pointed to the 

 32%|██████████████████████████▏                                                        | 6/19 [00:01<00:03,  3.42it/s]

Was he in movies?(CNN) -- Dennis Farina, the dapper, mustachioed cop-turned-actor best known for his tough-as-nails work in such TV series as "Law & Order," "Crime Story," and "Miami Vice," has died. He was 69.   "We are deeply saddened by the loss of a great actor and a wonderful man," said his publicist, Lori De Waal, in a statement Monday. "Dennis Farina was always warmhearted and professional, with a great sense of humor and passion for his profession. He will be greatly missed by his family, friends and colleagues."   Farina, who had a long career as a police officer in Chicago, got into acting through director Michael Mann, who used him as a consultant and cast him in his 1981 movie, "Thief." That role led to others in such Mann-created shows as "Miami Vice" (in which Farina played a mobster) and "Crime Story" (in which he starred as Lt. Mike Torello).   Farina also had roles, generally as either cops or gangsters, in a number of movies, including "Midnight Run" (1988), "Get Shor

 37%|██████████████████████████████▌                                                    | 7/19 [00:02<00:03,  3.65it/s]

Who did he portray?(CNN) -- Dennis Farina, the dapper, mustachioed cop-turned-actor best known for his tough-as-nails work in such TV series as "Law & Order," "Crime Story," and "Miami Vice," has died. He was 69.   "We are deeply saddened by the loss of a great actor and a wonderful man," said his publicist, Lori De Waal, in a statement Monday. "Dennis Farina was always warmhearted and professional, with a great sense of humor and passion for his profession. He will be greatly missed by his family, friends and colleagues."   Farina, who had a long career as a police officer in Chicago, got into acting through director Michael Mann, who used him as a consultant and cast him in his 1981 movie, "Thief." That role led to others in such Mann-created shows as "Miami Vice" (in which Farina played a mobster) and "Crime Story" (in which he starred as Lt. Mike Torello).   Farina also had roles, generally as either cops or gangsters, in a number of movies, including "Midnight Run" (1988), "Get Sh

 42%|██████████████████████████████████▉                                                | 8/19 [00:02<00:03,  3.23it/s]

Where do Quinton and Kendra travel to and from every day?Kendra and Quinton travel to and from school every day. Kendra lives further from the bus stop than Quinton does, stops every morning at Quinton's house to join him to walk to the bus stop. Every afternoon, after school, when walking home from the bus stop they go in for cookies and milk that Quinton's mother has ready and waiting for them. Quinton can't eat cheese or cake so they had the same snack every day. They both work together on their homework and when they are done they play together. Kendra always makes sure to leave in time to get home for dinner. She doesn't want to miss story time which was right before bedtime.   One morning Kendra walked up to Quinton's house, she thought something might be wrong because normally Quinton was waiting outside for her and on this morning he was not to be found. Kendra went up to the door and knocked. She waited and waited and yet no one answered. She saw that Quinton's mother's car wa

 47%|███████████████████████████████████████▎                                           | 9/19 [00:02<00:03,  2.99it/s]

Did she see the car?Kendra and Quinton travel to and from school every day. Kendra lives further from the bus stop than Quinton does, stops every morning at Quinton's house to join him to walk to the bus stop. Every afternoon, after school, when walking home from the bus stop they go in for cookies and milk that Quinton's mother has ready and waiting for them. Quinton can't eat cheese or cake so they had the same snack every day. They both work together on their homework and when they are done they play together. Kendra always makes sure to leave in time to get home for dinner. She doesn't want to miss story time which was right before bedtime.   One morning Kendra walked up to Quinton's house, she thought something might be wrong because normally Quinton was waiting outside for her and on this morning he was not to be found. Kendra went up to the door and knocked. She waited and waited and yet no one answered. She saw that Quinton's mother's car wasn't in their driveway which was weir

 53%|███████████████████████████████████████████▏                                      | 10/19 [00:03<00:03,  2.92it/s]

Where did he go?Kendra and Quinton travel to and from school every day. Kendra lives further from the bus stop than Quinton does, stops every morning at Quinton's house to join him to walk to the bus stop. Every afternoon, after school, when walking home from the bus stop they go in for cookies and milk that Quinton's mother has ready and waiting for them. Quinton can't eat cheese or cake so they had the same snack every day. They both work together on their homework and when they are done they play together. Kendra always makes sure to leave in time to get home for dinner. She doesn't want to miss story time which was right before bedtime.   One morning Kendra walked up to Quinton's house, she thought something might be wrong because normally Quinton was waiting outside for her and on this morning he was not to be found. Kendra went up to the door and knocked. She waited and waited and yet no one answered. She saw that Quinton's mother's car wasn't in their driveway which was weird. S

 58%|███████████████████████████████████████████████▍                                  | 11/19 [00:03<00:02,  3.16it/s]

What is its population?Staten Island is one of the five boroughs of New York City in the U.S. state of New York. In the southwest of the city, Staten Island is the southernmost part of both the city and state of New York, with Conference House Park at the southern tip of the island and the state. The borough is separated from New Jersey by the Arthur Kill and the Kill Van Kull, and from the rest of New York by New York Bay. With a 2016 Census-estimated population of 476,015, Staten Island is the least populated of the boroughs but is the third-largest in area at. Staten Island is the only borough of New York with a non-Hispanic White majority. The borough is coextensive with Richmond County, and until 1975 was the Borough of Richmond. Its flag was later changed to reflect this. Staten Island has been sometimes called "the forgotten borough" by inhabitants who feel neglected by the city government.   The North Shore—especially the neighborhoods of St. George, Tompkinsville, Clifton, and

 63%|███████████████████████████████████████████████████▊                              | 12/19 [00:03<00:02,  3.33it/s]

What was the first thing he checked?Thunder was coming when Reginald Eppes woke up at five in the morning. He checked the weather forecast. A violent storm was coming,but it sounded like his small town wouldn't be hit too hard. But Eppes, a firefighter, had clearly known the power of these huge storms from experiences. "Do you know where the flashlights are?" he asked his wife. Danielle. Just then, thunder was all-around them. The moment he turned the flashlight on. The house lights went off. A second later, the kitchen windows were broken. Eppes and Danielle ran to their boys who were still sleeping in their bedroom.   "Get up, get up, R.J.! " Eppes shouted, waving his flashlight. The sleepy boy moved to the edge of the bed. Eppes held out his arms and ordered his son to jump. He was too late. The roof was torn down. R.J. was buried,under the pieces.   "I've lost him," Eppes thought. Quickly, he hurried to Joel to shield him. Glass, wood, and plaster ( ) hit them. Then something huge,

 68%|████████████████████████████████████████████████████████                          | 13/19 [00:04<00:01,  3.31it/s]

Was RJ badly hurt?Thunder was coming when Reginald Eppes woke up at five in the morning. He checked the weather forecast. A violent storm was coming,but it sounded like his small town wouldn't be hit too hard. But Eppes, a firefighter, had clearly known the power of these huge storms from experiences. "Do you know where the flashlights are?" he asked his wife. Danielle. Just then, thunder was all-around them. The moment he turned the flashlight on. The house lights went off. A second later, the kitchen windows were broken. Eppes and Danielle ran to their boys who were still sleeping in their bedroom.   "Get up, get up, R.J.! " Eppes shouted, waving his flashlight. The sleepy boy moved to the edge of the bed. Eppes held out his arms and ordered his son to jump. He was too late. The roof was torn down. R.J. was buried,under the pieces.   "I've lost him," Eppes thought. Quickly, he hurried to Joel to shield him. Glass, wood, and plaster ( ) hit them. Then something huge, heavy-maybe the w

 74%|████████████████████████████████████████████████████████████▍                     | 14/19 [00:04<00:01,  3.60it/s]

Why?(CNN) -- FBI agents on Friday night searched the Maryland home of the suspect in the recent disappearance of an American woman in Aruba, an agent said.   The search is occurring in the Gaithersburg residence of Gary Giordano, who is currently being held in an Aruban jail, FBI Special Agent Rich Wolf told CNN.   Agents, wearing vests that said FBI and carrying empty cardboard and plastic boxes, arrived about 8:40 p.m. Friday. About 15 unmarked cars could be seen on the street, as well as a Montgomery County police vehicle.   Supervisory Special Agent Philip Celestini, who was at the residence, declined to comment further on the search, citing the active investigation.   Aruban Solicitor General Taco Stein said earlier Friday that the suspect will appear in court Monday, where an investigating magistrate could order him held for at least eight more days, order him to remain on the island or release him outright due to a lack of evidence.   Giordano was arrested by Aruban police on Au

 84%|█████████████████████████████████████████████████████████████████████             | 16/19 [00:04<00:00,  4.05it/s]

What was she doing?(CNN) -- FBI agents on Friday night searched the Maryland home of the suspect in the recent disappearance of an American woman in Aruba, an agent said.   The search is occurring in the Gaithersburg residence of Gary Giordano, who is currently being held in an Aruban jail, FBI Special Agent Rich Wolf told CNN.   Agents, wearing vests that said FBI and carrying empty cardboard and plastic boxes, arrived about 8:40 p.m. Friday. About 15 unmarked cars could be seen on the street, as well as a Montgomery County police vehicle.   Supervisory Special Agent Philip Celestini, who was at the residence, declined to comment further on the search, citing the active investigation.   Aruban Solicitor General Taco Stein said earlier Friday that the suspect will appear in court Monday, where an investigating magistrate could order him held for at least eight more days, order him to remain on the island or release him outright due to a lack of evidence.   Giordano was arrested by Arub

 89%|█████████████████████████████████████████████████████████████████████████▎        | 17/19 [00:04<00:00,  4.20it/s]

What news agency showed photos of American soldiers?Kabul, Afghanistan (CNN) -- The German news outlet Der Spiegel has published photographs of what appear to be two U.S. soldiers in Afghanistan posing over the bodies of dead Afghans -- images which threaten to further complicate the American military effort there.   Two images show the soldiers kneeling by a bloody body sprawled over a patch of sand and grass. A third shows what appears to be two bodies propped up, back to back, against a post in front of a military vehicle.   Der Spiegel identifies the soldiers as Spc. Jeremy Morlock and Pfc. Andrew Holmes, who are both facing charges relating to the wrongful deaths of Afghan civilians.   Specifically, Holmes is charged with the premeditated deaths of three civilians, possessing a dismembered human finger, wrongfully possessing photographs of human casualties, and smoking hashish.   He is also accused of conspiring with Morlock to shoot at a civilian and then toss a grenade so it wou

 95%|█████████████████████████████████████████████████████████████████████████████▋    | 18/19 [00:05<00:00,  4.12it/s]

The other?Kabul, Afghanistan (CNN) -- The German news outlet Der Spiegel has published photographs of what appear to be two U.S. soldiers in Afghanistan posing over the bodies of dead Afghans -- images which threaten to further complicate the American military effort there.   Two images show the soldiers kneeling by a bloody body sprawled over a patch of sand and grass. A third shows what appears to be two bodies propped up, back to back, against a post in front of a military vehicle.   Der Spiegel identifies the soldiers as Spc. Jeremy Morlock and Pfc. Andrew Holmes, who are both facing charges relating to the wrongful deaths of Afghan civilians.   Specifically, Holmes is charged with the premeditated deaths of three civilians, possessing a dismembered human finger, wrongfully possessing photographs of human casualties, and smoking hashish.   He is also accused of conspiring with Morlock to shoot at a civilian and then toss a grenade so it would look like the soldiers were under attac

100%|██████████████████████████████████████████████████████████████████████████████████| 19/19 [00:05<00:00,  3.48it/s]

how many people does he promote(CNN)A chiseled boxer's Instagram feed shows him making constant references to the Bible and enjoying gospel singing with his wife.   Another features his formidable opponent counting stacks of money, hanging out in strip clubs, and flashing diamond watches and Ferraris.   Welcome to the world of boxing promotion, circa 2015.   American Floyd Mayweather and Filipino Manny Pacquiao are set to officially announce their heavily anticipated boxing match at a press conference in Los Angeles Wednesday.   With the combined purse for the May 2 bout in Las Vegas reported to touch $300 million pending viewership numbers, the incentives to self-promote could not be higher.   "Nowadays you have to be on social media to launch the fight and to build hype," says boxing promoter Nisse Sauerland, CEO of Team Sauerland. "It couldn't be done without it."   Thirty-eight year old Mayweather (47-0, 26 knockouts), who favors the moniker "The Money Man" or "TBE" (The Best Ever)


