In [1]:
# Importing libraries
import json
import pandas as pd
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import os
from configuration import Configuration
from configuration import CONSTANTS as C
# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer
from model_t5 import T5ForConditionalGeneration
from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction
from rich.table import Column, Table
from rich import box
from rich.console import Console
from tensorboardX import SummaryWriter
from torch.nn import CrossEntropyLoss
import time
from torch import cuda

In [2]:


class YourDataSetClass(Dataset):
    """
    Creating a custom dataset for reading the dataset and 
    loading it into the dataloader to pass it to the neural network for finetuning the model
    """    
    def __init__(self, dataframe, tokenizer, source_len, target_len,answer_len, source_text, target_text,answer_text):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = target_len
        self.ans_len = answer_len
        self.target_text = self.data[target_text]
        self.source_text = self.data[source_text]
        self.answer_text = self.data[answer_text]

    def __len__(self):
        return len(self.target_text)

    def __getitem__(self, index):
        source_text = str(self.source_text[index])
        target_text = str(self.target_text[index])
        answer_text = str(self.answer_text[index])
        #cleaning data so as to ensure data is in string type
        source_text = ' '.join(source_text.split())
        target_text = ' '.join(target_text.split())
        answer_text = ' '.join(answer_text.split())
        source = self.tokenizer.batch_encode_plus([source_text], max_length= self.source_len, pad_to_max_length=True, truncation=True, padding="max_length", return_tensors='pt')
        target = self.tokenizer.batch_encode_plus([target_text], max_length= self.summ_len, pad_to_max_length=True, truncation=True, padding="max_length", return_tensors='pt')
        answer = self.tokenizer.batch_encode_plus([answer_text], max_length= self.ans_len, pad_to_max_length=True, truncation=True, padding="max_length", return_tensors='pt')
        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()
        answer_ids = answer['input_ids'].squeeze()
        answer_mask = answer['attention_mask'].squeeze()
        return {
            'source_ids': source_ids.to(dtype=torch.long), 
            'source_mask': source_mask.to(dtype=torch.long), 
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long),
            'answer_ids': answer_ids.to(dtype=torch.long),
            'answer_mask': answer_mask.to(dtype=torch.long)
        }


def create_model_dir(experiment_main_dir, experiment_id, model_summary):
    """
    Create a new model directory.
    :param experiment_main_dir: Where all experiments are stored.
    :param experiment_id: The ID of this experiment.
    :param model_summary: A summary string of the model.
    :return: A directory where we can store model logs. Raises an exception if the model directory already exists.
    """
    model_name = "{}-{}".format(experiment_id, model_summary)
    model_dir = os.path.join(experiment_main_dir, model_name)
    if os.path.exists(model_dir):
        raise ValueError("Model directory already exists {}".format(model_dir))
    os.makedirs(model_dir)
    return model_dir

def train(epoch, tokenizer, model, device, loader, optimizer,writer,global_step,records,model_dir):

    """
    Function to be called for training with the parameters passed from main function

    """
    model.train()
    c=0
    for _,data in enumerate(loader, 0):
        c=c+1
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)
        
        ans_str = data['answer_ids'].to(device, dtype = torch.long)
        ans_mask = data['answer_mask'].to(device, dtype = torch.long)
        
        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids,
                        labels=lm_labels,answer_str=ans_str,answer_mask=ans_mask,tokenizer=tokenizer,c=c)
        loss = outputs[0]
        
        #print("preds",outputs["pred_ids"])
        #preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in outputs["pred_ids"]]
        #print(preds)
        #print("ans",outputs["ans_ids"])
        #an = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in outputs["ans_ids"]]
        #print(an)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        writer.add_scalar("loss", loss, global_step)
        
        
        ### measure bleu
        if c%10==0:
            model.eval()
            predictions = []
            actuals = []
            num_dist=[]
            ##outputs = model.generate(input_ids=input_ids, num_beams=5, num_return_sequences=3)
            generated_ids = model.generate(
              input_ids = ids,
              attention_mask = mask, 
              max_length=150, 
              num_beams=3,
              repetition_penalty=2.5, 
              length_penalty=1.0, 
              early_stopping=True,
            num_return_sequences=3,
              )
            print(generated_ids.shape)
            print(generated_ids)
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=False)for t in y]
            print(preds)
            print(target)
            predictions.extend(preds)
            for tt in target:
                print(tt)
                actuals.extend([tt,tt,tt])
                num_dist.extend([1,2,3])
                print("actualslen",len(actuals))
                print(actuals)
            print(len(actuals))
            print(len(predictions))
            print(num_dist)
            temp_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals,'Num distractor':num_dist})
            print(temp_df.head())
            val=records.rename(columns={'distractor':'Actual Text'})

            gen_dist=val.merge(temp_df,on=['Actual Text']).loc[:,['text','Generated Text','Num distractor']]

            distractors=val.groupby(['text']).agg({ 'Actual Text': lambda x: list(x.str.split())}).reset_index()

            dist_compare=distractors.merge(gen_dist,on=['text'])
            dist_compare['Generated Text']=dist_compare['Generated Text'].str.split()
            dist_compare=dist_compare.assign(bleu1=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(1, 0, 0, 0),smoothing_function=SmoothingFunction().method1),axis=1))
            dist_compare=dist_compare.assign(bleu2=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 1, 0, 0),smoothing_function=SmoothingFunction().method1),axis=1))
            dist_compare=dist_compare.assign(bleu3=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 1, 0),smoothing_function=SmoothingFunction().method1),axis=1))
            dist_compare=dist_compare.assign(bleu4=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 0, 1),smoothing_function=SmoothingFunction().method1),axis=1))
            
            for i in range(1,4):
                bleu_1=dist_compare.loc[dist_compare['Num distractor']==i].bleu1.mean()
                bleu_2=dist_compare.loc[dist_compare['Num distractor']==i].bleu2.mean()
                bleu_3=dist_compare.loc[dist_compare['Num distractor']==i].bleu3.mean()
                bleu_4=dist_compare.loc[dist_compare['Num distractor']==i].bleu4.mean()
                writer.add_scalar('bleu/distractor_{}/bleu_1'.format(i), bleu_1, global_step)
                writer.add_scalar('bleu/distractor_{}/bleu_2'.format(i), bleu_2, global_step)
                writer.add_scalar('bleu/distractor_{}/bleu_3'.format(i), bleu_3, global_step)
                writer.add_scalar('bleu/distractor_{}/bleu_4'.format(i), bleu_4, global_step)
            
            
            bleu_1=dist_compare.bleu1.mean()
            bleu_2=dist_compare.bleu2.mean()
            bleu_3=dist_compare.bleu3.mean()
            bleu_4=dist_compare.bleu4.mean()
            writer.add_scalar("bleu/distractor_gen/bleu_1", bleu_1, global_step)
            writer.add_scalar("bleu/distractor_gen/bleu_2", bleu_2, global_step)
            writer.add_scalar("bleu/distractor_gen/bleu_3", bleu_3, global_step)
            writer.add_scalar("bleu/distractor_gen/bleu_4", bleu_4, global_step)
            
            if c%1000==0:
                path = os.path.join(model_dir, "model_files")
                model.save_pretrained(path)
                tokenizer.save_pretrained(path)

            model.train()

        
        
        global_step += 1
    return global_step


def validate(epoch, tokenizer, model, device, loader,writer):

    """
    Function to evaluate model for predictions

    """
    global_step = 0
    model.eval()
    predictions = []
    actuals = []
    num_dist=[]
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)
            generated_ids = model.generate(
              input_ids = ids,
              attention_mask = mask, 
              max_length=150, 
              num_beams=3,
              repetition_penalty=2.5, 
              length_penalty=1.0, 
              early_stopping=True,
                num_return_sequences=3,
              )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=False)for t in y]
            predictions.extend(preds)
            for tt in target:
                actuals.extend([tt,tt,tt])
                num_dist.extend([1,2,3])

        temp_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals,'Num distractor':num_dist})
        val=records.rename(columns={'distractor':'Actual Text'})

        gen_dist=val.merge(temp_df,on=['Actual Text']).loc[:,['text','Generated Text','Num distractor']]

        distractors=val.groupby(['text']).agg({ 'Actual Text': lambda x: list(x.str.split())}).reset_index()

        dist_compare=distractors.merge(gen_dist,on=['text'])
        dist_compare['Generated Text']=dist_compare['Generated Text'].str.split()
        dist_compare=dist_compare.assign(bleu1=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(1, 0, 0, 0),smoothing_function=SmoothingFunction().method1),axis=1))
        dist_compare=dist_compare.assign(bleu2=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 1, 0, 0),smoothing_function=SmoothingFunction().method1),axis=1))
        dist_compare=dist_compare.assign(bleu3=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 1, 0),smoothing_function=SmoothingFunction().method1),axis=1))
        dist_compare=dist_compare.assign(bleu4=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 0, 1),smoothing_function=SmoothingFunction().method1),axis=1))

        for i in range(1,4):
            bleu_1=dist_compare.loc[dist_compare['Num distractor']==i].bleu1.mean()
            bleu_2=dist_compare.loc[dist_compare['Num distractor']==i].bleu2.mean()
            bleu_3=dist_compare.loc[dist_compare['Num distractor']==i].bleu3.mean()
            bleu_4=dist_compare.loc[dist_compare['Num distractor']==i].bleu4.mean()
            writer.add_scalar('val/bleu/distractor_{}/bleu_1'.format(i), bleu_1, global_step)
            writer.add_scalar('val/bleu/distractor_{}/bleu_2'.format(i), bleu_2, global_step)
            writer.add_scalar('val/bleu/distractor_{}/bleu_3'.format(i), bleu_3, global_step)
            writer.add_scalar('val/bleu/distractor_{}/bleu_4'.format(i), bleu_4, global_step)


        bleu_1=dist_compare.bleu1.mean()
        bleu_2=dist_compare.bleu2.mean()
        bleu_3=dist_compare.bleu3.mean()
        bleu_4=dist_compare.bleu4.mean()
        writer.add_scalar("val/bleu/distractor_gen/bleu_1", bleu_1, global_step)
        writer.add_scalar("val/bleu/distractor_gen/bleu_2", bleu_2, global_step)
        writer.add_scalar("val/bleu/distractor_gen/bleu_3", bleu_3, global_step)
        writer.add_scalar("val/bleu/distractor_gen/bleu_4", bleu_4, global_step)
            
    return predictions, actuals



def main(config):
    model_params={
        "MODEL":"t5-small",             # model_type: t5-base/t5-large
        "TRAIN_BATCH_SIZE":2,          # training batch size
        "VALID_BATCH_SIZE":2,          # validation batch size
        "TRAIN_EPOCHS":2,              # number of training epochs
        "VAL_EPOCHS":1,                # number of validation epochs
        "LEARNING_RATE":1e-4,          # learning rate
        "MAX_SOURCE_TEXT_LENGTH":300,  # max length of source text
        "MAX_TARGET_TEXT_LENGTH":301,   # max length of target text
        "MAX_ANSWER_LENGTH":300,   # max length of answer text
        "SEED": 42                     # set seed for reproducibility 

    }


    source_text='text'
    target_text='distractor'
    answer_text='answer_text'
    model_params=model_params

    with open(os.path.join(C.DATA_DIR, "distractor/race_train_original.json"), 'r') as content_file:
        content = content_file.read()
    content=content.replace('\n',',')
    content='['+content[:-1]+']'
    records = json.loads(content)
    records=pd.DataFrame(records)
    
    # Set random seeds and deterministic pytorch for reproducibility
    torch.manual_seed(model_params["SEED"]) # pytorch random seed
    np.random.seed(model_params["SEED"]) # numpy random seed
    torch.backends.cudnn.deterministic = True


    # tokenzier for encoding the text
    tokenizer = T5Tokenizer.from_pretrained(model_params["MODEL"])

    # Defining the model. We are using t5-base model and added a Language model layer on top for generation of Summary. 
    # Further this model is sent to device (GPU/TPU) for using the hardware.
    model = T5ForConditionalGeneration.from_pretrained(model_params["MODEL"])
    model = model.to(C.DEVICE)

    ## format the input
    records=records.assign(question=records.question.str.join(' '))
    records=records.assign(distractor=records.distractor.str.join(' '))
    records=records.assign(article=records.article.str.join(' '))
    records=records.assign(answer_text=records.answer_text.str.join(' '))
    records=records.loc[:,['article','question','answer_text','distractor']]
    records=records.assign(text="dist q: "+records.question+" a: "+records.answer_text+" p: "+records.article)
    records=records.loc[:,['text','distractor','answer_text']]

    with open(os.path.join(C.DATA_DIR, "distractor/race_dev_original.json"), 'r') as content_file:
        content = content_file.read()
    content=content.replace('\n',',')
    content='['+content[:-1]+']'
    records_test = json.loads(content)
    records_test=pd.DataFrame(records_test)

    ## format the input
    records_test=records_test.assign(question=records_test.question.str.join(' '))
    records_test=records_test.assign(distractor=records_test.distractor.str.join(' '))
    records_test=records_test.assign(article=records_test.article.str.join(' '))
    records_test=records_test.assign(answer_text=records_test.answer_text.str.join(' '))
    records_test=records_test.loc[:,['article','question','answer_text','distractor']]
    records_test=records_test.assign(text="dist q: "+records_test.question+" a: "+records_test.answer_text+" p: "+records_test.article)
    records_test=records_test.loc[:,['text','distractor','answer_text']]

    # Creation of Dataset and Dataloader
    # Defining the train size. So 80% of the data will be used for training and the rest for validation. 
    val_dataset=records_test
    train_dataset = records


    # Creating the Training and Validation dataset for further creation of Dataloader
    training_set = YourDataSetClass(train_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"],model_params["MAX_ANSWER_LENGTH"], source_text, target_text,answer_text)
    val_set = YourDataSetClass(val_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"],model_params["MAX_ANSWER_LENGTH"], source_text, target_text,answer_text)



    # Defining the parameters for creation of dataloaders
    train_params = {
      'batch_size': model_params["TRAIN_BATCH_SIZE"],
      'shuffle': True,
      'num_workers': 0
      }


    val_params = {
      'batch_size': model_params["VALID_BATCH_SIZE"],
      'shuffle': False,
      'num_workers': 0
      }


    # Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.
    training_loader = DataLoader(training_set, **train_params)
    val_loader = DataLoader(val_set, **val_params)


    # Defining the optimizer that will be used to tune the weights of the network in the training session. 
    optimizer = torch.optim.Adam(params =  model.parameters(), lr=model_params["LEARNING_RATE"])
    
    # Create Tensorboard logger.
    experiment_id = int(time.time())
    experiment_name = "name"
    model_dir = create_model_dir(os.path.join(C.DATA_DIR, "experiments/"), experiment_id, experiment_name)
        
    global_step = 0
    writer = SummaryWriter(os.path.join(model_dir, 'logs'))
    for epoch in range(model_params["TRAIN_EPOCHS"]):
        global_step=train(epoch, tokenizer, model, C.DEVICE, training_loader, optimizer,writer,global_step,records,model_dir)

    #Saving the model after training
    path = os.path.join(model_dir, "model_files")
    model.save_pretrained(path)
    tokenizer.save_pretrained(path)


    # evaluating test dataset
    for epoch in range(model_params["VAL_EPOCHS"]):
        predictions, actuals = validate(epoch, tokenizer, model, C.DEVICE, val_loader,writer)
        final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})
        final_df.to_csv(os.path.join(model_dir, 'predictions.csv'),index=False)




In [None]:
if __name__ == '__main__':
    main(Configuration.parse_cmd())

torch.Size([6, 32])
tensor([[    0,  1028,    17,     3,  1824,    10,   363,   410,     8, 17926,
             7,   103,   116,    79,  1509,     8,  1871,     3,     5,   328,
          2139,    12,  1822,    34,    45,     8,     3,    89,  4664,     3,
             5,     1],
        [    0,     3,  1824,    10,   363,   410,     8, 17926,     7,   103,
           116,    79,  1509,     8,  1871,     3,     5,   328,  2139,    12,
          1822,    34,    45,     8,     3,    89,  4664,     3,     5,     1,
             0,     0],
        [    0,  1028,    17,     3,  1824,    10,   363,   410,     8, 17926,
             7,   103,   116,    79,  1509,     8,  1871,     3,     5,   328,
          2139,  1822,    34,    45,     8,     3,    89,  4664,     3,     5,
             1,     0],
        [    0, 10747,     7,    15,     1,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,   

torch.Size([6, 59])
tensor([[    0,     3,  1927,  4102,    53,    65,   582,     8,   167,  1012,
           928,  2600,    16,  1371,     3,     5,   163,    80,  1843,    13,
          1277,    19,   906,  1636,     3,     9,  3116,    13,   207,  4439,
             3,     5,     1,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0],
        [    0,     3,  1927,  4102,    53,    19,     8,   167,  1012,   928,
          2600,    16,  1371,     3,     5,   163,    80,  1843,    13,  1277,
            19,   906,  1636,     3,     9,  3116,    13,   207,  4439,     3,
             5,     1,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0],
        [    0,     3,  1927,  4102,    53, 

torch.Size([6, 34])
tensor([[    0, 14053,    33,    38,   207,    38,  2340,  1720,     7,    11,
          6112,     7,     3,   102,    10,   151,   497,    24,     8,  6579,
          1155,    33,  3654,     3,     5,     1,     0,     0,     0,     0,
             0,     0,     0,     0],
        [    0, 14053,    33,    38,   207,    38,  2340,  1720,     7,    11,
          6112,     7,     3,   102,    10,   151,   497,    24,     8,  6579,
          1155,    33,  3654,     3,     5,    96,  1326,    54,   103,   406,
         14421,     7,   121,     1],
        [    0,     3,     9,    10, 14053,    33,    38,   207,    38,  2340,
          1720,     7,    11,  6112,     7,     3,   102,    10,   151,   497,
            24,     8,  6579,  1155,    33,  3654,     3,     5,     1,     0,
             0,     0,     0,     0],
        [    0,     3,     9,  1871,     3,  9427,     9,  7398,    19,  2170,
           335,  1922,   306,    11,  1086,     3,  6347,    30,   165,   22

torch.Size([6, 45])
tensor([[    0,  3157,  8888,     7,  7102,  1152,    29,   920,   633,  1274,
           139,     8,   847,    13,   932,     3,     5,  7902,    16,     8,
           616,    43,     3, 23310,    57, 10603,  4526, 12509,    35,  4474,
             3,     6,    11,     8,  3157,  8888,     7,  7178,    95,    81,
             3,     9,   847,  2283,     1],
        [    0,  3157,  8888,     7,  7102,  1152,    29,   920,   633,  1274,
           139,     8,   847,    13,   932,     3,     5,  7902,    16,     8,
           616,    43,     3, 23310,    57, 10603,  4526, 12509,    35,  4474,
             3,     5,     1,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0],
        [    0,  3157,  8888,     7,  7102,  1152,    29,   342,   633,  1274,
           139,     8,   847,    13,   932,     3,     5,  7902,    16,     8,
           616,    43,     3, 23310,    57, 10603,  4526, 12509,    35,  4474,
             3,     5

torch.Size([6, 22])
tensor([[    0,   160,  2353,    47,  3164,   479,    21,   160,     3,   102,
            10,    71,  7569, 15825,     3, 26402,   190,   160,  1268,     3,
             5,     1],
        [    0,   160,  2353,   410,    59,   240,   124,    13,     8,  2004,
           250,     3,    88,    47,  3164,   479,    21,   160,     3,   102,
            10,     1],
        [    0,   160,  1362,   133,    36,  7918,   213,   255,    47,     3,
             5,     1,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0],
        [    0,  3671,   384,  9855,  1045,  7468,     7,    13,  1974,    16,
            70,   182,   293, 10276,     3,     5,     1,     0,     0,     0,
             0,     0],
        [    0,  3671,   384,  9855,  1045,  7468,     7,    13,  1974,    16,
            70,   293, 10276,     3,     5,     1,     0,     0,     0,     0,
             0,     0],
        [    0,     3,     9,    10, 10295, 14799, 12530,   656,  1646

torch.Size([6, 17])
tensor([[   0,    3,    9,   10,    8, 4473,   54,   36,  182, 9748,   12,    8,
         5480,   16,    8,  296,    1],
        [   0,    8, 4473,   54,   36,  182, 9748,   12,    8, 5480,   16,    8,
          296,    1,    0,    0,    0],
        [   0,    8, 4473,   54,   36,  182, 9748,   12,    8, 5480,   16,    8,
          296,    3,    5,    1,    0],
        [   0,   72,  151,  764,   12,   70,    3, 9176,    3,    5,    1,    0,
            0,    0,    0,    0,    0],
        [   0,   72,  151,  764,   12,   70,    3, 9176,    1,    0,    0,    0,
            0,    0,    0,    0,    0],
        [   0,    3,    9,   10,   37,  151,  130, 4403,   12,  199,  684, 1082,
            1,    0,    0,    0,    0]])
['a: the exhibition can be very inspiring to the blind in the world', 'the exhibition can be very inspiring to the blind in the world', 'the exhibition can be very inspiring to the blind in the world .', 'more people came to their stall .', 'more people

torch.Size([6, 24])
tensor([[    0,   250,     8,  2291,     3,    31,     7,   280,    47,  1256,
           237,   713,   255,    47,   787,     3,     9, 10946,     1,     0,
             0,     0,     0,     0],
        [    0,     8,  2291,     3,    31,     7,   280,    47,  1256,   237,
           713,   255,    47,   787,     3,     9, 10946,     1,     0,     0,
             0,     0,     0,     0],
        [    0,     8,  2291,     3,    31,     7,   280,    47,  1256,   237,
           713,   255,    47,   787,     3,     9, 10946,     3,     5,     1,
             0,     0,     0,     0],
        [    0,  8767,  3273, 19214,  4702,    16,  9884,  5718,    19,     3,
         14138,     3,     9,   503,    84,  8286,     7,    12,     3,    60,
         12039,     8,  2109,     1],
        [    0, 19214, 25990,    16,     8,  5727,    13,  2789,    19,     3,
         19874,    12,  2412,    95,   126,     3,  9905,  7208,    12,   199,
          2519,   415, 11402,     1],


['a man stopped his car and said : " I \'m here to help you .', 'man stopped his car and said : " I \'m here to help you .', 'a man stopped his car and said : " I \'m here to help you', 'the link between drawing and later intelligence was influenced by genes', "children were asked to complete a ' Draw - a - Child", "children were asked to complete a ' Draw - a - Child test"]
['He asked her to help his wife .', 'They were tested on how smart they were once .']
He asked her to help his wife .
actualslen 3
['He asked her to help his wife .', 'He asked her to help his wife .', 'He asked her to help his wife .']
They were tested on how smart they were once .
actualslen 6
['He asked her to help his wife .', 'He asked her to help his wife .', 'He asked her to help his wife .', 'They were tested on how smart they were once .', 'They were tested on how smart they were once .', 'They were tested on how smart they were once .']
6
6
[1, 2, 3, 1, 2, 3]
                                      Generate

torch.Size([6, 13])
tensor([[    0,     8,  6364,  9190,    54,  1137,  4038,   982,    12,  1774,
             1,     0,     0],
        [    0,     8,  6364,  9190,    54,  1137,  4038,   982,    12,  1774,
             3,     5,     1],
        [    0,     3,  5855,  2085, 15543,     1,     0,     0,     0,     0,
             0,     0,     0],
        [    0,  2855,   656,   150,  1750,    16,     8,  1903,  1080,    13,
             8,  3863,     1],
        [    0,  2855,   656,   150,  1750,    16,     8,  1903,  1080,    13,
          3863,     1,     0],
        [    0,  2855,   656,   150,  1750,    16,  1903,  1080,    13,     8,
          3863,     1,     0]])
['the empty nest can cause bigger problems to surface', 'the empty nest can cause bigger problems to surface .', 'aging sleeplessness', 'Education makes no difference in the pass rate of the drivers', 'Education makes no difference in the pass rate of drivers', 'Education makes no difference in pass rate of the driver

torch.Size([6, 15])
tensor([[    0,   250,    79,   217,   175,  7404,    38,   705,     3,    18,
         12894,    16,   613,   512,     1],
        [    0,    79,   217,   175,  7404,    38,   705,     3,    18, 12894,
            16,   613,   512,     1,     0],
        [    0,   250,    79,   217,   175,  7404,    38,   705, 12894,    16,
           613,   512,     1,     0,     0],
        [    0,    12,   199,  1921,     8,   502,     3,    31,     7,  1183,
          1098,     1,     0,     0,     0],
        [    0,    12,   199,  1921,   502,     3,    31,     7,  1183,  1098,
             1,     0,     0,     0,     0],
        [    0,   502,     3,    31,     7,   484,     1,     0,     0,     0,
             0,     0,     0,     0,     0]])
['because they see these subjects as less - promising in job market', 'they see these subjects as less - promising in job market', 'because they see these subjects as less promising in job market', "to help challenge the children 's re

torch.Size([6, 10])
tensor([[   0,  363,   19,    8, 2829,   13,   70,  307, 1342,    1],
        [   0,    3,   29,   31,   17,  281,   12,  217, 6659,    1],
        [   0, 8767,  799,    1,    0,    0,    0,    0,    0,    0],
        [   0,    8,  200,  194,   12,  777,   39, 6743,  540,    1],
        [   0,    3,    9,  385, 6743,  540,    1,    0,    0,    0],
        [   0,    3,    9,   10,   62,   54,  777, 3242,    1,    0]])
['What is the secret of their long lives', "n't go to see doctors", 'Fresh air', 'the best way to enjoy your pocket money', 'a little pocket money', 'a: we can enjoy ourselves']
["They do n't sit all day in busy offices .", 'there would be no fun without pocket money']
They do n't sit all day in busy offices .
actualslen 3
["They do n't sit all day in busy offices .", "They do n't sit all day in busy offices .", "They do n't sit all day in busy offices ."]
there would be no fun without pocket money
actualslen 6
["They do n't sit all day in busy offices 

torch.Size([6, 14])
tensor([[    0,  5209,  6868,     7,  3417,  3397,     1,     0,     0,     0,
             0,     0,     0,     0],
        [    0,  5209,  6868,     7,  3417,  3397,  7030,  2386,   932,     3,
            87,  1515,     1,     0],
        [    0,  5209,  6868,     7,  3417,  3397,  7030,  2386,     1,     0,
             0,     0,     0,     0],
        [    0, 11019,  1896, 14046,     3, 12895,   160, 12227,    12,   913,
            21,  1021,   151,     1],
        [    0, 11019,  1896, 14046,    65,     3, 12895,   160, 12227,    12,
           913,    21,   502,     1],
        [    0, 11019,  1896, 14046,     3, 12895,   160, 12227,    12,   913,
             1,     0,     0,     0]])
['Young Cuts Film Festival', 'Young Cuts Film Festival Toronto Washington May / June', 'Young Cuts Film Festival Toronto Washington', 'Mollie Hunter devoted her talents to writing for young people', 'Mollie Hunter has devoted her talents to writing for children', 'Mollie Hunte

torch.Size([6, 12])
tensor([[   0, 1021, 1076,   43,   12,  619,  614,  280,   30,    8, 1373,    1],
        [   0,   96, 1263, 4653,    3,    6, 1021, 1076,    1,    0,    0,    0],
        [   0, 1021, 1076,    3,    5,    1,    0,    0,    0,    0,    0,    0],
        [   0, 3567,   33,   72, 1638,   16, 8973,    1,    0,    0,    0,    0],
        [   0, 3567,  608,   72,   16,  879,    1,    0,    0,    0,    0,    0],
        [   0, 3567,  608,   72,    1,    0,    0,    0,    0,    0,    0,    0]])
['young men have to live hard life on the road', '" Go west , young men', 'young men .', 'girls are more interested in fiction', 'girls read more in general', 'girls read more']
["prison does n't change them", 'boys have no interest in fiction']
prison does n't change them
actualslen 3
["prison does n't change them", "prison does n't change them", "prison does n't change them"]
boys have no interest in fiction
actualslen 6
["prison does n't change them", "prison does n't change them

In [3]:
if __name__ == '__main__':
    main(Configuration.parse_cmd())

tensor(1.1706, grad_fn=<AddBackward0>)
ans tensor([[    8, 12939,    30,   149,    12,  4226,     8,  2503,  3142,   655,
            13,   502,     1,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
         

['the the the  their they are. their   their not not her not not        not not not   her  not  True  not  not   not  not not not  not  not  not not not  not not  not     not   not     not  not  not not   not not not not True  not True  not  not not  not    not not not   not   True not   not  not not the the to  the not not the not  not not   True  not  not not not  not not   not not not  not not not  not    not  not    not not   not    not      not    not  not not not True     not   not  not  not     True  not  not  not not not not not   not not not  not not    not    not    not not not not not not    not    not not not  not  not not  True  not     not  not  not True not  not not   not not  not not  not    not not not not   not not not not not   not not  not  not   not', '. canyon                 "         I  dis    I   I  dis     dis       dis            I I   I I dis  I     dis  I    dis                        dis  dis           dis    dis               I  dis      dis     dis  dis 

['and and                                               the the the the', 'global global   global      global       global               food   global       global            food  global global        global global     global food           they  in the the in the       food                     food      global  global  global food global      global       global        global     food     global global          food  global      global  food  global   global       food       global     food    food       global         food global global     global   food        global     food global       global']
tensor(1.1667, grad_fn=<AddBackward0>)
ans tensor([[ 8667,     5,  1945,    65,   641,   530,     3,     9,   306,   496,
         18320,     3,     5,     1,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,   

['., she " " people the get about about. she she                                 Mrs                          Mrs              Mrs   Mrs  she the the her the the                                    Mrs                                      Mrs                            Mrs       Mrs                      Mrs        Mrs      Mrs               Mrs    Mrs', 'to to, will   the homes                                                                                                  all all all will all the the new homes       two                                                                                             the']
tensor(1.1421, grad_fn=<AddBackward0>)
ans tensor([[   8, 1075,   13, 2833, 1164,   19,  359,    3,    5,    1,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         

['art the thea art  art . the                                                                                                      the hospital hospital the the the the hospital art hospital', 'a  - d  for women . the     handsome                     attractive                handsome                                     attractive  attractive                                              new         men    handsome  good    handsome                         handsome                                                                                 handsome          attractive    handsome']
tensor(1.1812, grad_fn=<AddBackward0>)
ans tensor([[ 5521,     8,  1021,    12,  3319, 12389,     1,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0

['of thecamp   the  young Sea Sea Sea Sea Sea Sea Sea Sea  sea Sea Sea Sea Sea Sea Sea Sea Sea Sea  Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea  Sea Sea Sea Sea Sea Sea Sea Sea Sea        sea Sea  Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea  Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea Sea sea Sea Sea Sea Sea Sea Sea Sea Sea sea Sea Sea Sea Sea Sea Sea Sea Sea Sea   Sea Sea Sea Sea Sea Sea dis Sea Sea Sea Sea sea Sea Sea Sea Sea  Sea Sea Sea Sea Sea Sea Sea Sea', 'Rubber Duck arrived in own  the . the the  the    Rubber Rubber Rubber Rubber   the Rubber Rubber Rubber     Rubber Rubber Rubber Rubber Rubber Rubber Rubber  Rubber  Rubber  Rubber Rubber Rubber   Rubber Rubber  Rubber Rubber Rubber  Rubber Rubber Rubber  Rubber  Rubber Rubber Rubber Rubber Rubber Rubber Rubber Rubber    Rubber Rubber Rubber Rubber Rubber   Rubber Rubber Rubbe

['To To,,.      Professor  half half        half  Professor half half    half   half half      half  half  Professor  half    scientists   Professor   climate   Professor      half  half       half   Professor   climate          half  half     half   half Climate  Professor      Professor Professor  to Climate hunger  climate  climate  half  Professor         Professor    climate   Professor half half  half   half      Professor  half     half   half    half climate    half   half half  Professor  half            climate  half half       half  half Professor Professor     scientists Professor   half      Professor     half      half     half  Professor   Professor     half  half    half      climate     Professor   half     half   half               half   half', 'is four. -. five-.  ,.  Mary  Mary   Mary not not      Mary Mary Mary Mary Mary Mary  Mary  Mary Mary  Mary      Mary Mary   Mary Mary  Mary Mary Mary  Mary not Mary Mary Mary  Mary Mary Mary      Mary Mary Mary Mary  Mary Ma

['???a able     dis       volunteering    volunteering volunteer volunteer Volunteer      volunteer    volunteer Volunteer   volunteer volunteer  volunteer  volunteer volunteer volunteer volunteer volunteer volunteer   volunteer  volunteer Volunteer  volunteer   volunteer volunteer volunteer volunteer  volunteer volunteering    Volunteer volunteer volunteer volunteer volunteer volunteer   volunteer  volunteer      Volunteer volunteer volunteer  Volunteer  volunteer volunteer   volunteer volunteer volunteer volunteer Volunteer volunteer volunteer  volunteer Volunteer volunteer volunteer volunteer volunteer  volunteer  volunteer?  True  What? Can  not dis dis    dis   dis   dis dis    volunteering dis Volunteer  volunteer dis volunteer   volunteer  volunteer volunteer  volunteer volunteer volunteer volunteer volunteer Volunteer volunteer volunteer dis volunteer volunteer  Volunteer volunteer  Volunteer    Volunteer  volunteer  Volunteer volunteer  volunteer  volunteer volunteer   volunte

['. e -  clothess- pot     I man  man   I        I         I    I  I       I    I  man       I            I   man I    I   I      I I     I     I I     clothes                    I   man I     man    I I     I        I   I  man     man           I             I    I    man   man     I I    I   I     I   I    I          man man    I     I    I I         man   man      I  I  I   I I    I   I     I I', ",he did n's   anything ."]
tensor(1.1841, grad_fn=<AddBackward0>)
ans tensor([[  328,   164,  2615,    70,  2476,     3,     9,    89,   929,   578,
          9388,     1,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,  

['study                             mother                 mother  They                                                                       study', '.        cancer will beaa    to . on                doctors         doctors              doctors     doctors  doctors         doctor                     doctors doctors          doctors              cancer cancer                                                                       doctors               doctors      doctors    doctors   doctor                                    doctors     doctors     doctors                    doctors  doctors']
tensor(1.1995, grad_fn=<AddBackward0>)
ans tensor([[  328,  2139,    12,  1822,    34,    45,     8,     3,    89,  4664,
             3,     5,     1,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,   

['Protect!    theate away .                     elephant                                   elephant                                                                                                  elephant                                         elephant              elephant                                   elephant', '. story   dis  dis dis dis  dis dis   dis dis dis  dis mark dis dis dis dis dis dis dis dis dis  dis dis Mark dis dis dis dis dis dis dis   dis dis dis  dis dis  dis dis dis dis  Mark dis Mark dis  dis dis  dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis Mark dis dis dis dis dis Mark dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis tell tell how     tell   dis dis dis dis dis dis dis dis dis dis dis  dis dis dis Mark  dis   Mark dis dis dis Mark dis dis  dis dis dis  dis dis dis dis Mark dis dis dis dis dis  dis Mark dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis d

['To the not to toeat  .  dis dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis  dis dis dis  dis dis dis dis dis dis dis dis  dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis dis   dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis To warn   nots    dis warn dis dis dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis  dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis    dis dis dis dis dis dis dis dis  dis  dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis dis  dis dis dis dis dis dis dis  dis dis dis  dis dis dis  dis dis dis dis dis dis  dis dis dis dis dis dis dis

KeyboardInterrupt: 