### Imports

In [1]:
from IPython.display import display_html, clear_output
from itertools import chain,cycle
from copy import deepcopy
import urllib.request
import transformers
import numpy as np
import json
import time
import os
import torch
import random 
import pandas as pd
from tqdm import tqdm

from sklearn.model_selection import GroupShuffleSplit
from datasets import *
from transformers import AutoTokenizer, PreTrainedTokenizerFast, EncoderDecoderModel, Seq2SeqTrainingArguments, Seq2SeqTrainer, AdamW, DataCollatorForSeq2Seq
#from allennlp_models.rc.tools import squad

import plotly.express as px

# Display dataframes
def display(*args,titles=cycle([''])):
    html_str=''
    for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
        html_str+='<th style="text-align:left"><td style="vertical-align:top">'
        html_str+=f'<h4 style="text-align: left;">{title}</h2>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        html_str+='</td></th>'
    display_html(html_str,raw=True)
    
# Setting seeds for reproducibility
def set_reproducibility(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    transformers.set_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    os.environ['TF_DETERMINISTIC_OPS'] = '1'



In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # use the GPU
else:
    device = torch.device("cpu")  # use the CPU

print("Using device:", device)

Using device: cuda


### Dataset Download

In [3]:
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)
        
def download_url(url, output_path):
    with DownloadProgressBar(unit='B', unit_scale=True,
                             miniters=1, desc=url.split('/')[-1]) as t:
        urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)

def download_data(data_path, url_path, suffix):    
    if not os.path.exists(data_path):
        os.makedirs(data_path)
        
    data_path = os.path.join(data_path, f'{suffix}.json')

    if not os.path.exists(data_path):
        print(f"Downloading CoQA {suffix} data split... (it may take a while)")
        download_url(url=url_path, output_path=data_path)
        urllib.request.urlretrieve(url_path, filename=data_path)
        print("Download completed!")

In [4]:
# Train data
train_url = "https://nlp.stanford.edu/data/coqa/coqa-train-v1.0.json"
download_data(data_path='coqa', url_path=train_url, suffix='train')

# Test data
test_url = "https://nlp.stanford.edu/data/coqa/coqa-dev-v1.0.json"
download_data(data_path='coqa', url_path=test_url, suffix='test') 

### Preprocessing

In [5]:
# Creating Dataframes and removing unanswerable questions
train_data = json.load((open('coqa/train.json')))
test_data = json.load((open('coqa/test.json')))

qas = pd.json_normalize(train_data['data'], ['questions'], ['source', 'id', 'story'])
ans = pd.json_normalize(train_data['data'], ['answers'],['id'])
train_val_df = pd.merge(qas,ans, left_on=['id','turn_id'], right_on=['id','turn_id'])
train_val_df = train_val_df.loc[train_val_df['input_text_y']!='unknown']

qas = pd.json_normalize(test_data['data'], ['questions'], ['source', 'id', 'story'])
ans = pd.json_normalize(test_data['data'], ['answers'],['id'])
test_df = pd.merge(qas,ans, left_on=['id','turn_id'], right_on=['id','turn_id'])
test_df = test_df.loc[test_df['input_text_y']!='unknown']

In [6]:
# Removing bad turns
train_val_df = train_val_df.loc[(train_val_df['bad_turn_x'] != 'True') & (train_val_df['bad_turn_y'] != 'True')]

# Removing equal text/answer entries
train_val_df = train_val_df[train_val_df.story != train_val_df.input_text_y]
test_df = test_df[test_df.story != test_df.input_text_y]

# Removing enties with empty answers
train_val_df = train_val_df[train_val_df['input_text_y'].str.len()>0]
test_df = test_df[test_df['input_text_y'].str.len()>0]

In [7]:
# Text preprocess
def preprocess(ds,columns):
    ds = ds.replace(r'\n',' ', regex=True)
    ds = ds.replace(r'[^\w\s]+', '', regex=True)
    for feature in columns:
        ds[feature] = ds[feature].str.lower().str.strip()
        
    return ds

columns = ['story', 'input_text_x', 'span_text', 'input_text_y']

train_val_df = preprocess(train_val_df,columns)
test_df = preprocess(test_df,columns)

In [8]:
# Train/Validation Split
set_reproducibility(42)

train_inds, val_inds = next(GroupShuffleSplit(test_size=.20, n_splits=2, random_state = 42).split(train_val_df, groups=train_val_df['id']))

train_df = train_val_df.iloc[train_inds]
val_df = train_val_df.iloc[val_inds].reset_index()

print(train_df.columns)

Index(['input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story',
       'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y'],
      dtype='object')


In [9]:
# Checking the Dataframes
print(f'Training set [{train_df.shape}]')
print(f'\tFeatures: {list(train_df.columns)}')
display(train_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

print(f'Validation set [{val_df.shape}]')
print(f'\tFeatures: {list(val_df.columns)}')
display(val_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

print(f'Test set [{test_df.shape}]')
print(f'\tFeatures: {list(test_df.columns)}')
display(test_df.loc[11:15,['id', 'input_text_x', 'input_text_y', 'span_text']])

Training set [(85823, 11)]
	Features: ['input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3zotghdk5ibi9cex97fepx7jetpso7,how many items are in this secret collection,150000,vatican secret archives were separated from the library at the beginning of the 17th century they contain another 150000 items
12,3zotghdk5ibi9cex97fepx7jetpso7,can anyone use this library,anyone who can document their qualifications and research needs,the vatican library is open to anyone who can document their qualifications and research needs
14,3zotghdk5ibi9cex97fepx7jetpso7,what must be requested in person or by mail,photocopies,photocopies for private study of pages from books published between 1801 and 1990 can be requested in person or by mail
15,3zotghdk5ibi9cex97fepx7jetpso7,of what books,only books published between 1801 and 1990,hotocopies for private study of pages from books published between 1801 and 1990


Validation set [(21452, 12)]
	Features: ['index', 'input_text_x', 'turn_id', 'bad_turn_x', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y', 'bad_turn_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,where was milly led to,cottonwoods,led milly erne to cottonwoods
12,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,who took her there,a man,the man who had led milly erne to cottonwoods
13,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,whose name would jane not speak,this mormons name,this mormons name
14,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,did she allow herself to even think it,no,she did not even think it
15,3bdcf01ogxu7zdn9vlrbf2rqzwplyf,what was jane hoping lassiter would become to her,a helper of a friend of a champion,the need of a helper of a friend of a champio


Test set [(7917, 9)]
	Features: ['input_text_x', 'turn_id', 'source', 'id', 'story', 'span_start', 'span_end', 'span_text', 'input_text_y']


Unnamed: 0,id,input_text_x,input_text_y,span_text
11,3dr23u6we5exclen4th8uq9rb42tel,did they want cotton to change the color of her fur,no,we would never want you to be any other way
12,3azhrg4cu4ktme1zh7c2ro3pn2430d,what was the name of the fish,asta,asta
13,3azhrg4cu4ktme1zh7c2ro3pn2430d,what looked like a birds belly,a bottle,a bottle
14,3azhrg4cu4ktme1zh7c2ro3pn2430d,who said that,asta,it looks like a birds belly said asta
15,3azhrg4cu4ktme1zh7c2ro3pn2430d,was sharkie a friend,yes,astas friend sharkie


In [10]:
# Overlap Check
set_train = set(train_df['id'])
set_val = set(val_df['id'])

overlap = False
for i in set_train:
    if i in set_val:
        overlap = True
        break

print('Overlap' if overlap else 'No overlap')

No overlap


In [11]:
# Dataframes to Datasets
train_df_to_ds = train_df[columns]
val_df_to_ds = val_df[columns]
test_df_to_ds = test_df[columns]

train_df_to_ds = train_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})
val_df_to_ds = val_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})
test_df_to_ds = test_df_to_ds.rename(columns={'input_text_x': 'question', 'story': 'context',\
                                               'input_text_y': 'answer', 'span_text': 'text'})

In [12]:
# Datasets Batch split
batch_size = 6
ratio = 5

train_samples = (round(train_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size

val_samples = (round(val_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size
test_samples = (round(test_df_to_ds.shape[0] * ratio / 100) // batch_size) * batch_size

train_dataset = Dataset.from_dict(train_df_to_ds.iloc[:train_samples])
val_dataset = Dataset.from_dict(val_df_to_ds.iloc[:val_samples])
test_dataset = Dataset.from_dict(test_df_to_ds.iloc[:test_samples])

dataset_COQA = DatasetDict({'train':train_dataset,'validation':val_dataset,'test':test_dataset})
print(dataset_COQA)

DatasetDict({
    train: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 4290
    })
    validation: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 1068
    })
    test: Dataset({
        features: ['context', 'question', 'text', 'answer'],
        num_rows: 396
    })
})


In [13]:
max_length_input = 512
max_length_answer = 42

In [14]:
def prepare_features(batch, tokenizer, max_length_input, max_length_answer):
    # Tokenize the Question and Context columns
    encoded_batch_inputs = tokenizer(
        batch['question'],
        batch['context'],
        max_length=max_length_input,
        truncation='only_second',
        padding='max_length',
        return_tensors='pt'        
    )

    # Tokenize the Answer column
    encoded_batch_labels = tokenizer(
        batch['answer'],
        max_length=max_length_answer,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    
    encoded_batch_inputs['labels'] = encoded_batch_labels.input_ids
#   encoded_batch_inputs['decoder_input_ids'] = deepcopy(encoded_batch_inputs['labels'])
#   encoded_batch_inputs['labels'] = [[-100 if token == tokenizer.pad_token_id else token\
#                                    for token in labels]\
#                                    for labels in encoded_batch_inputs['labels']]
    
    encoded_batch_inputs['labels_mask'] = encoded_batch_labels.attention_mask


    return encoded_batch_inputs

### Tokenization

In [15]:
model_checkpoint_M1 = 'distilroberta-base'
# Tokenizer
tokenizer_M1 = AutoTokenizer.from_pretrained(model_checkpoint_M1)
assert isinstance(tokenizer_M1, PreTrainedTokenizerFast)
tokenizer_M1.bos_token = tokenizer_M1.cls_token
tokenizer_M1.eos_token = tokenizer_M1.sep_token

# Get the special tokens and their corresponding IDs
special_tokens = tokenizer_M1.special_tokens_map
special_ids = tokenizer_M1.convert_tokens_to_ids(list(special_tokens.values()))
print("Special tokens:")
for token_type, token_list in special_tokens.items():
    print(f"{token_type}: {token_list}")
# Print the special tokens and their corresponding IDs
for token, id in zip(special_tokens.keys(), special_ids):
    print(f"{token}: {id}")

Special tokens:
bos_token: <s>
eos_token: </s>
unk_token: <unk>
sep_token: </s>
pad_token: <pad>
cls_token: <s>
mask_token: <mask>
bos_token: 0
eos_token: 2
unk_token: 3
sep_token: 2
pad_token: 1
cls_token: 0
mask_token: 50264


In [16]:
# Tokenizing the Dataset
tokenized_datasets_M1 = DatasetDict()

# Use the `prepare_features` functions
tokenized_datasets_M1 = dataset_COQA.map(
    lambda batch: prepare_features(batch, tokenizer_M1, max_length_input, max_length_answer),
    batched=True,
    batch_size=batch_size,
    remove_columns=dataset_COQA['train'].column_names
)

print(tokenized_datasets_M1)

  0%|          | 0/715 [00:00<?, ?ba/s]

  0%|          | 0/178 [00:00<?, ?ba/s]

  0%|          | 0/66 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 4290
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 1068
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
        num_rows: 396
    })
})


### Model definition

In [17]:
# Load Model
model_M1 = EncoderDecoderModel.from_encoder_decoder_pretrained(model_checkpoint_M1, model_checkpoint_M1, tie_encoder_decoder=False)

# Model special tokens
model_M1.config.decoder_start_token_id = tokenizer_M1.cls_token_id
model_M1.config_eos_token_id = tokenizer_M1.sep_token_id
model_M1.config.pad_token_id = tokenizer_M1.pad_token_id
model_M1.config.vocab_size = model_M1.config.encoder.vocab_size

# Model hyperparams
model_M1.config.max_length = max_length_answer
model_M1.config.min_length = 1
model_M1.config.no_repeat_ngram_size = 1
model_M1.config.early_stopping = True
model_M1.config.repetition_penalty= 3.
model_M1.config.num_beams = 8

print(f"Parameters #: {model_M1.num_parameters()}")

model_M1.to(device)

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForCausalLM were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['roberta.encoder.layer.5.crossattention.self.key.weight', 'roberta.encoder.layer.1.crossattention.output.dense.bias', 'roberta.encoder.layer.1.crossattention.s

Parameters #: 178472025


EncoderDecoderModel(
  (encoder): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): L

### Training

In [18]:
training_args_M1 = Seq2SeqTrainingArguments(
    output_dir='./M1_Checkpoints',
    evaluation_strategy="epoch",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    predict_with_generate=True,
    overwrite_output_dir=True,
    #save_total_limit=2,
    fp16=True, 
    num_train_epochs = 3,
    weight_decay=0.01,
    logging_steps=10
    #resume_from_checkpoint = True
)

# Optimizer and scheduler
optimizer_M1 = AdamW(model_M1.parameters(),lr= 3e-5)
scheduler_M1 = transformers.get_cosine_schedule_with_warmup(optimizer=optimizer_M1,num_warmup_steps=50,num_training_steps=batch_size*3)
optimizers_M1 = optimizer_M1, scheduler_M1

trainer_M1 = Seq2SeqTrainer(
    model=model_M1,
    tokenizer=tokenizer_M1,
    args=training_args_M1,
    #compute_metrics=compute_metrics,
    train_dataset=tokenized_datasets_M1['train'],
    eval_dataset=tokenized_datasets_M1['validation'],
    optimizers=optimizers_M1,
    data_collator=DataCollatorForSeq2Seq(tokenizer_M1,model=model_M1)
)



Using cuda_amp half precision backend


In [19]:
os.environ["WANDB_DISABLED"] = "true"

trainer_M1.train()

The following columns in the training set don't have a corresponding argument in `EncoderDecoderModel.forward` and have been ignored: labels_mask. If labels_mask are not expected by `EncoderDecoderModel.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4290
  Num Epochs = 3
  Instantaneous batch size per device = 6
  Total train batch size (w. parallel, distributed & accumulation) = 6
  Gradient Accumulation steps = 1
  Total optimization steps = 2145
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




Epoch,Training Loss,Validation Loss
1,0.8974,0.61904
2,0.6171,0.59111
3,0.5292,0.585791


Saving model checkpoint to ./M1_Checkpoints\checkpoint-500
Configuration saved in ./M1_Checkpoints\checkpoint-500\config.json
Model weights saved in ./M1_Checkpoints\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./M1_Checkpoints\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./M1_Checkpoints\checkpoint-500\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `EncoderDecoderModel.forward` and have been ignored: labels_mask. If labels_mask are not expected by `EncoderDecoderModel.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1068
  Batch size = 6
Saving model checkpoint to ./M1_Checkpoints\checkpoint-1000
Configuration saved in ./M1_Checkpoints\checkpoint-1000\config.json
Model weights saved in ./M1_Checkpoints\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./M1_Checkpoints\checkpoint-1000\tokenizer_config.json
Special tokens file s

TrainOutput(global_step=2145, training_loss=0.857137624922888, metrics={'train_runtime': 374.3601, 'train_samples_per_second': 34.379, 'train_steps_per_second': 5.73, 'total_flos': 3972402192844800.0, 'train_loss': 0.857137624922888, 'epoch': 3.0})

In [26]:
tokenized_datasets_M1['test']

Dataset({
    features: ['input_ids', 'attention_mask', 'labels', 'labels_mask'],
    num_rows: 396
})

In [30]:
# Initialize the data collator
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer_M1, model=model_M1)

# Create a DataLoader for your dataset using the data collator
test_loader = torch.utils.data.DataLoader(tokenized_datasets_M1['test'], 
                                          batch_size=batch_size, 
                                          collate_fn=data_collator)
for batch in tqdm(test_loader):
    example = batch['input_ids'].to(device)
    att_mask = batch['attention_mask'].to(device)
    generated_ids = model_M1.generate(input_ids=example, 
                                      attention_mask=att_mask,
                                      max_length=max_length_answer)
    generated_answers = tokenizer_M1.batch_decode(generated_ids, skip_special_tokens=True)
    print(f'Generated ans: {generated_answers}')
    true = batch["labels"]
    ground_truth = tokenizer_M1.batch_decode(true, skip_special_tokens=True)
    print(f'True ans: {ground_truth}')
    
    



  2%|█▎                                                                                 | 1/66 [00:00<00:39,  1.63it/s]

Generated ans: ['he it was to a the his him', 'he he was to a the him', 'he it was to a the his him', 'he he was to a the him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['white', 'in a barn', 'no', 'with her mommy and 5 sisters', 'orange and white', 'no']


  3%|██▌                                                                                | 2/66 [00:00<00:26,  2.43it/s]

Generated ans: ['he it was to a the his him', 'he he was to a the him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['she painted herself', 'the farmer', 'they started laughing', 'a bucket of water', 'licked her face', 'no']


  5%|███▊                                                                               | 3/66 [00:01<00:21,  2.90it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['asta', 'a bottle', 'asta', 'yes', 'yes', 'a note']


  6%|█████                                                                              | 4/66 [00:01<00:19,  3.20it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he he was to a the him']
True ans: ['no', 'astas papa', 'yes', 'an elderly chinese lady and a little boy', 'yes', 'a paper carrier bag']


  8%|██████▎                                                                            | 5/66 [00:01<00:18,  3.37it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he he was to a the him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['yes', 'nicole', 'shanghai', 'mother', 'food', 'yes']


  9%|███████▌                                                                           | 6/66 [00:01<00:17,  3.50it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['i am having heart surgery soon so her mother has decided i need more nutrients', 'an ipad', 'hot soup and a container with rice vegetables and either chicken meat or shrimp sometimes with a kind of pancake', 'i am now working on some more chinese words', 'yes', 'thank you']


 11%|████████▊                                                                          | 7/66 [00:02<00:16,  3.53it/s]

Generated ans: ['he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her']
True ans: ['yes', 'dennis farina', 'actor', 'no', 'yes', 'no']


 12%|██████████                                                                         | 8/66 [00:02<00:16,  3.52it/s]

Generated ans: ['he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her']
True ans: ['farina was cast in a film', 'michael mann', 'thief', 'cops or gangsters', 'he joined a tv show cast', 'law  order']


 14%|███████████▎                                                                       | 9/66 [00:02<00:16,  3.54it/s]

Generated ans: ['he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her', 'he was a the one to his her']
True ans: ['detective joe fontana', 'no', 'an expensive car', 'no', 'flashy', 'no']


 15%|████████████▍                                                                     | 10/66 [00:03<00:15,  3.61it/s]

Generated ans: ['he was a the one to his her', 'he was a the one to his her', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['no', 'a cop', 'school', 'no', 'go to quentins house', 'no']


 17%|█████████████▋                                                                    | 11/66 [00:03<00:15,  3.65it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['no', 'story time', 'right before bedtime', 'no one answered', 'no', 'no']


 18%|██████████████▉                                                                   | 12/66 [00:03<00:14,  3.67it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['that she was upset', 'yes', 'everything would be okay', 'her teacher', 'no', 'quintons mother']


 20%|████████████████▏                                                                 | 13/66 [00:03<00:14,  3.68it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he was a the east of west', 'he was a the east of west', 'he was a the east of west', 'he was a the east of west']
True ans: ['to the dentist', 'yes', 'five', 'new york city', 'new york', 'yes']


 21%|█████████████████▍                                                                | 14/66 [00:04<00:13,  3.78it/s]

Generated ans: ['he was a the east of west', 'he was a the east of west', 'he was a the east of west', 'he was a the east of west', 'he was a the east of west', 'he was a the east of west']
True ans: ['in the southwest of the city', 'arthur kill and the kill van kull', '476015', 'no', 'nonhispanic white', 'the forgotten borough']


 23%|██████████████████▋                                                               | 15/66 [00:04<00:13,  3.79it/s]

Generated ans: ['he was a the east of west', 'he was a the east of west', 'he was a the east of west', 'he he was to the a his him', 'he he was to the a his him', 'he he was to the a his him']
True ans: ['because the inhabitants feel neglected by the city government', 'north shore', 'st george tompkinsville clifton and stapleton', 'five in the morning', 'weather forecast', 'yes']


 24%|███████████████████▉                                                              | 16/66 [00:04<00:13,  3.78it/s]

Generated ans: ['he he was to his a the him', 'he he was to the a his him', 'he he was to the a his him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to the a his him']
True ans: ['firefighter', 'yes', 'flashlight', 'rj', 'joel', 'glass wood plaster and maybe the washing machine']


 26%|█████████████████████                                                             | 17/66 [00:04<00:13,  3.62it/s]

Generated ans: ['he he was to the a his him', 'he he was to his a the him', 'he he was to his a the him', 'he was a the east of her to his their', 'he was a the west of her to his their', 'he was a the west of her to his their']
True ans: ['no', 'eppes', 'the flashlight', 'gary giordano', 'gaithersburg', 'montgomery county']


 27%|██████████████████████▎                                                           | 18/66 [00:05<00:13,  3.52it/s]

Generated ans: ['he was a the east of her to his their', 'he was a the west of her to his their', 'he was a the her to his one', 'he was a the her to his one', 'he was a the east of her to his their', 'he was a the east of her to his their']
True ans: ['maryland', 'aruban jail', 'suspect in the recent disappearance of an american woman', 'fbi', '15', 'aruban solicitor general taco stein']


 29%|███████████████████████▌                                                          | 19/66 [00:05<00:13,  3.45it/s]

Generated ans: ['he was a the east of her to his their', 'he was a the her to his one', 'he was a the west of her to his their', 'he was a the west of her to his their', 'he was a the her to his one', 'he was a the west of her to his their']
True ans: ['monday', 'at least eight more days', 'robyn gardne', 'ast seen near baby beach', 'snorkeling', 'giordano']


 30%|████████████████████████▊                                                         | 20/66 [00:05<00:13,  3.53it/s]

Generated ans: ['he was a the her to his one', 'he was a the her to his one', 'he was a the her to his one', 'he was a the her to his one', 'he was a the her to his one', 'he it was to the a one']
True ans: ['no gardner was nowhere to be found', 'locals say is not a popular snorkeling spot', '50', 'august 5', '2 giordano told authorities that he had been snorkeling with gardner', 'great britain']


 32%|██████████████████████████                                                        | 21/66 [00:06<00:12,  3.59it/s]

Generated ans: ['he it was to his a the her', 'he it was to the a one', 'he it was to the a one', 'he it was to the a one', 'he it was to the a one', 'he it was to a his the her']
True ans: ['india', 'may be 30 feet tall', 'prune it', 'may prevent heart disease', 'by accident', 'shen nong']


 33%|███████████████████████████▎                                                      | 22/66 [00:06<00:12,  3.45it/s]

Generated ans: ['he it was to a his the her', 'he it was to his a the her', 'he it was to a the one of his her', 'he it was to a the one of his her', 'he it was to a the his her of one', 'he it was to a the his her of one']
True ans: ['about 2737 bc', 'yes', 'der spiegel', 'germany', 'posing over the bodies of dead afghans', 'bloody']


 35%|████████████████████████████▌                                                     | 23/66 [00:06<00:12,  3.32it/s]

Generated ans: ['he it was to a the his her of one', 'he it was to a the one of his her', 'he it was to a the his her of one', 'he it was to a the his her of one', 'he it was to a the one of his her', 'he it was to a the one of his her']
True ans: ['propped up back to back', 'military vehicle', 'taking or retaining individual souvenirs or trophies', 'jeremy morlock', 'pfc andrew holmes', 'holmes is charged with the premeditated deaths of three civilians']


 36%|█████████████████████████████▊                                                    | 24/66 [00:07<00:12,  3.33it/s]

Generated ans: ['he was a the one to her in his', 'he it was is to a the one of her', 'he it was is to a the one of her', 'he was a the one to her in his', 'he it was is to a the one of her', 'he it was is to a the one of her']
True ans: ['floyd mayweather and manny pacquiao', '1 is the money man', 'tbe', 'the best ever', 'the money team', 'a boxing promoter']


 38%|███████████████████████████████                                                   | 25/66 [00:07<00:12,  3.34it/s]

Generated ans: ['he it was is to a the one of her', 'he it was is to a the one of her', 'he it was is to a the one of her', 'he it was is to a the one of her', 'paper of the a information to their food', 'paper of the a research and computer']
True ans: ['over 45 boxers', '300 million pending viewership numbers', '38', 'just that it has bible references and shows him enjoying gose singing with his wife', 'oclc', 'online computer library center']


 39%|████████████████████████████████▎                                                 | 26/66 [00:07<00:11,  3.46it/s]

Generated ans: ['paper of the a information to their food', 'paper of the a information to their food', 'paper of the a twenty century', 'paper of the a information to their food', 'paper of the a research and computer', 'paper of the a research and computer']
True ans: ['1967', 'yes', 'ohio', 'ohio state university', 'frederick g kilgour', 'he is not']


 41%|█████████████████████████████████▌                                                | 27/66 [00:07<00:11,  3.48it/s]

Generated ans: ['paper of the a research and computer', 'paper of the a information to their food', 'paper of the a information to their food', 'paper of the a twenty century', 'paper of the a information to their 2020', 'paper of the a twenty century']
True ans: ['medical school librarian', 'worldcat', 'july 5 1967', 'ohio state university', 'alden library', 'ohio university']


 42%|██████████████████████████████████▊                                               | 28/66 [00:08<00:10,  3.56it/s]

Generated ans: ['paper of the a information to their food', 'paper of the a research and computer', 'paper of the a research and computer', 'he heard to a the his her', 'he heard to a the her', 'he heard to a the her']
True ans: ['online cataloging', 'august 26 1971', 'no', 'no', 'they bought flowers', 'its 15']


 44%|████████████████████████████████████                                              | 29/66 [00:08<00:10,  3.66it/s]

Generated ans: ['he heard to a the his her', 'he heard to a the her', 'he heard to a the her', 'he heard to a the her', 'he heard to a the her', 'he heard to a the her']
True ans: ['no', 'it doesnt look good', 'summer', '15', 'no', 'a pen']


 45%|█████████████████████████████████████▎                                            | 30/66 [00:08<00:09,  3.62it/s]

Generated ans: ['he heard to a the his her', 'he heard to a the her', 'he heard to a the her', 'he he was to a the his him', 'he he was to his a the him', 'he he was to a the his him']
True ans: ['she already has two blouses', 'mothers birthday', 'at least 500', 'by a big lake by the woods', 'mice', 'toy boats']


 47%|██████████████████████████████████████▌                                           | 31/66 [00:08<00:09,  3.67it/s]

Generated ans: ['he he was to a the his him', 'he he was to his a the him', 'he he was to a the his him', 'he he was to a the his him', 'he he was to a the his him', 'he he was to his a the him']
True ans: ['yes', 'mary and steve', 'his house', 'climbed on', 'swimming and splashing', 'threw a ball into the water']


 48%|███████████████████████████████████████▊                                          | 32/66 [00:09<00:09,  3.60it/s]

Generated ans: ['he he was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his her', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['got very wet', 'the hospital had been bombed', 'no', 'germany', 'eastern germany at the time of his hospital stay', 'western germany']


 50%|█████████████████████████████████████████                                         | 33/66 [00:09<00:09,  3.63it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['no', 'yes', 'no', 'no just guessed', 'hans settled down in a village fifty miles away', 'yes for twenty years']


 52%|██████████████████████████████████████████▏                                       | 34/66 [00:09<00:08,  3.64it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['a workman', 'yes', 'hans bussman', 'yes franz does', 'no', 'he assumed hans was dead']


 53%|███████████████████████████████████████████▍                                      | 35/66 [00:10<00:08,  3.66it/s]

Generated ans: ['he he was to a his the her', 'he it was to a the his him', 'he it was to a the his him', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her']
True ans: ['mrs bussman', 'franz laughed at the idea', 'no', 'the _ariel_', 'lagoon', 'no']


 55%|████████████████████████████████████████████▋                                     | 36/66 [00:10<00:09,  3.30it/s]

Generated ans: ['he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her']
True ans: ['winters', 'no', 'no', 'malaita', 'harley kennan', 'villa']


 56%|█████████████████████████████████████████████▉                                    | 37/66 [00:10<00:08,  3.40it/s]

Generated ans: ['he he was to a the his her', 'he he was to a the his her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her']
True ans: ['the arangi', 'until they get back to tulagi', 'harley kennan', 'no', 'mrs riggs', 'topsy']


 58%|███████████████████████████████████████████████▏                                  | 38/66 [00:10<00:08,  3.38it/s]

Generated ans: ['he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he he was to a his the her', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['mademoiselle de maupin', 'no', 'yes', 'every walled inlet of the outer reef and every mangrove swamp of the mainland that looked promising of cannibal life', 'brownie and spotty', 'every day']


 59%|████████████████████████████████████████████████▍                                 | 39/66 [00:11<00:08,  3.12it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['loved each other', 'worn a path through the grass of the field', 'ted', 'brownie', 'yes', 'yes']


 61%|█████████████████████████████████████████████████▋                                | 40/66 [00:11<00:08,  3.18it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['a spot half a mile from the house', 'brought him food', 'protect him from other dangers', 'keep his spirits up', 'yes', 'spotty followed ted about barking insistently']


 62%|██████████████████████████████████████████████████▉                               | 41/66 [00:12<00:08,  3.04it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['yes', 'yes', 'no', 'yes they went looking for him with no success', 'no', 'they were busy with their own lives']


 64%|████████████████████████████████████████████████████▏                             | 42/66 [00:12<00:07,  3.06it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['follow me its urgent', 'a girl and a dog', 'set on on a trip', 'the woods', 'scared', 'he wasnt']


 65%|█████████████████████████████████████████████████████▍                            | 43/66 [00:12<00:07,  3.12it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['he was interested', 'what was in the bushes', 'a bear', 'rested in the bushes', 'not really', 'surprised']


 67%|██████████████████████████████████████████████████████▋                           | 44/66 [00:12<00:07,  3.14it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['not surprised', 'looked at the girl', 'he smiled', 'no', 'no one', 'no']


 68%|███████████████████████████████████████████████████████▉                          | 45/66 [00:13<00:06,  3.24it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to his a the her', 'he it was to a the him', 'he it was to his a the her']
True ans: ['dark and cold', 'yes', 'no', '10yearold boy fatally shot his father', 'in the front seat of a suv', 'no']


 70%|█████████████████████████████████████████████████████████▏                        | 46/66 [00:13<00:06,  3.06it/s]

Generated ans: ['he it was to his a the her', 'he it was to his a the her', 'he it was to a the him', 'he it was to a the him', 'he it was to his a the her', 'he it was to his a the her']
True ans: ['outside the home of lohstrohs exwife', 'friday', '3pm', 'belonged to the boys mother', 'he exited the back of the vehicle and continued to fire at the car', 'no']


 71%|██████████████████████████████████████████████████████████▍                       | 47/66 [00:13<00:06,  3.14it/s]

Generated ans: ['he it was to a the him', 'he it was to his a the her', 'he it was to his a the her', 'he it was to a the him', 'he it was to the her of a his', 'he it was to the her of a his']
True ans: ['at the university of texas medical branch', 'inside the house', 'yes', 'the 7yearold', 'fra girolamo', 'no']


 73%|███████████████████████████████████████████████████████████▋                      | 48/66 [00:14<00:05,  3.27it/s]

Generated ans: ['he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his']
True ans: ['no', 'romola', 'in the duomo', 'june', 'for some weeks', 'a sign from baldassarre']


 74%|████████████████████████████████████████████████████████████▉                     | 49/66 [00:14<00:05,  3.15it/s]

Generated ans: ['he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his', 'he it was to the her of a his', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['sympathy with savonarola', 'plague', 'the frate', 'no', 'yes', 'to utah']


 76%|██████████████████████████████████████████████████████████████                    | 50/66 [00:14<00:05,  3.12it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['in seattle', 'no', 'in a small apartment', 'no', 'her friends', 'yes']


 77%|███████████████████████████████████████████████████████████████▎                  | 51/66 [00:15<00:04,  3.18it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['two', 'the large truck', 'jennys mom', 'yes', 'yummy fast food', 'she loved it']


 79%|████████████████████████████████████████████████████████████████▌                 | 52/66 [00:15<00:04,  3.29it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the one of her', 'he it was to a the one of her']
True ans: ['a knock at the door', 'a little girl', 'to play with jenny', 'she liked it', 'she  fell on a beginners slope', 'skiing']


 80%|█████████████████████████████████████████████████████████████████▊                | 53/66 [00:15<00:03,  3.36it/s]

Generated ans: ['he was a the one to his her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her']
True ans: ['canada', 'yes', 'she did not', 'about an hour', 'she did not show signs', 'a local hospital']


 82%|███████████████████████████████████████████████████████████████████               | 54/66 [00:16<00:03,  3.42it/s]

Generated ans: ['he was a the one to his her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her']
True ans: ['hopital du sacrecoeur', 'new york city', 'she was 45', 'a film star', 'yes', 'liam neeson']


 83%|████████████████████████████████████████████████████████████████████▎             | 55/66 [00:16<00:03,  3.42it/s]

Generated ans: ['he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her', 'he it was to a the one of her']
True ans: ['yes', 'sons', 'yes', 'tony', 'yes', 'acting']


 85%|█████████████████████████████████████████████████████████████████████▌            | 56/66 [00:16<00:02,  3.49it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['bark', 'three months', 'no', 'sammie', 'golden puppy', 'no']


 86%|██████████████████████████████████████████████████████████████████████▊           | 57/66 [00:16<00:02,  3.53it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['tired', 'no', 'peter', 'find a person', 'no', 'sleep']


 88%|████████████████████████████████████████████████████████████████████████          | 58/66 [00:17<00:02,  3.59it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['an alien dog', 'false', 'three', 'ti dicky and cj7', 'no', 'a movie']


 89%|█████████████████████████████████████████████████████████████████████████▎        | 59/66 [00:17<00:02,  3.47it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['no', 'china', 'ti and his son', 'no', 'a doll', 'false']


 91%|██████████████████████████████████████████████████████████████████████████▌       | 60/66 [00:17<00:01,  3.44it/s]

Generated ans: ['he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him', 'he it was to a the his him']
True ans: ['around his neck', 'no', 'it can talk and do magic', 'no', 'he found it in the trash', 'january']


 92%|███████████████████████████████████████████████████████████████████████████▊      | 61/66 [00:18<00:01,  3.18it/s]

Generated ans: ['he it was to a the his him', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her']
True ans: ['2008', 'artsandcrafts', 'recipes', 'heather neroy', 'southern california', 'shes a stayathome mom']


 94%|█████████████████████████████████████████████████████████████████████████████     | 62/66 [00:18<00:01,  3.02it/s]

Generated ans: ['he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her']
True ans: ['by copying the link', 'emailing it to herself', 'no', 'pinterest', 'no', 'yes']


 95%|██████████████████████████████████████████████████████████████████████████████▎   | 63/66 [00:18<00:00,  3.06it/s]

Generated ans: ['he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her']
True ans: ['the filing system', 'a halloween board', 'a shared color board', 'redecorating her daughters bedroom', 'follow others boards', 'repin another persons images']


 97%|███████████████████████████████████████████████████████████████████████████████▌  | 64/66 [00:19<00:00,  3.08it/s]

Generated ans: ['he it was to his a the her', 'he it was to his a the her', 'he it was to his a the her', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him']
True ans: ['yes', 'as neat', 'yes', 'sir earl', 'archies traces', 'makes him impatient to go forward']


 98%|████████████████████████████████████████████████████████████████████████████████▊ | 65/66 [00:19<00:00,  3.23it/s]

Generated ans: ['he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him']
True ans: ['yes', 'orders', '3 days', 'search for bruce', 'with the hound with the earl and a large party of menatarms', 'a traitor']


100%|██████████████████████████████████████████████████████████████████████████████████| 66/66 [00:19<00:00,  3.36it/s]

Generated ans: ['he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him', 'he he was to his a the him']
True ans: ['where bruce slept', 'reluctant', 'hector', 'yes', 'no', 'by foot']



