In [1]:
import json
import pandas as pd
# Importing libraries
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import os
from configuration import Configuration
from configuration import CONSTANTS as C
# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
from nltk.translate.bleu_score import sentence_bleu
from rich.table import Column, Table
from rich import box
from rich.console import Console
from tensorboardX import SummaryWriter
import time
# Setting up the device for GPU usage
from torch import cuda
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

In [2]:

class YourDataSetClass(Dataset):
    """
    Creating a custom dataset for reading the dataset and 
    loading it into the dataloader to pass it to the neural network for finetuning the model
    """    
    def __init__(self, dataframe, tokenizer, source_len, target_len, source_text, target_text):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = target_len
        self.target_text = self.data[target_text]
        self.source_text = self.data[source_text]

    def __len__(self):
        return len(self.target_text)

    def __getitem__(self, index):
        source_text = str(self.source_text[index])
        target_text = str(self.target_text[index])
        #cleaning data so as to ensure data is in string type
        source_text = ' '.join(source_text.split())
        target_text = ' '.join(target_text.split())
        source = self.tokenizer.batch_encode_plus([source_text], max_length= self.source_len, pad_to_max_length=True, truncation=True, padding="max_length", return_tensors='pt')
        target = self.tokenizer.batch_encode_plus([target_text], max_length= self.summ_len, pad_to_max_length=True, truncation=True, padding="max_length", return_tensors='pt')
        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()
        return {
            'source_ids': source_ids.to(dtype=torch.long), 
            'source_mask': source_mask.to(dtype=torch.long), 
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long)
        }

# define a rich console logger

def display_df(df):
    """display dataframe in ASCII format"""
    table = Table(Column("source_text", justify="center" ), Column("target_text", justify="center"), title="Sample Data",pad_edge=False, box=box.ASCII)
    for i, row in enumerate(df.values.tolist()):
        table.add_row(row[0], row[1])


def create_model_dir(experiment_main_dir, experiment_id, model_summary):
    """
    Create a new model directory.
    :param experiment_main_dir: Where all experiments are stored.
    :param experiment_id: The ID of this experiment.
    :param model_summary: A summary string of the model.
    :return: A directory where we can store model logs. Raises an exception if the model directory already exists.
    """
    model_name = "{}-{}".format(experiment_id, model_summary)
    model_dir = os.path.join(experiment_main_dir, model_name)
    if os.path.exists(model_dir):
        raise ValueError("Model directory already exists {}".format(model_dir))
    os.makedirs(model_dir)
    return model_dir

def train(epoch, tokenizer, model, device, loader, optimizer,writer,global_step):

    """
    Function to be called for training with the parameters passed from main function

    """
    model.train()
    for _,data in enumerate(loader, 0):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)
        
        outputs = model.forward(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        writer.add_scalar("loss", float(loss.detach().numpy()), global_step)
        global_step += 1
    return global_step


def validate(epoch, tokenizer, model, device, loader,writer):

    """
    Function to evaluate model for predictions

    """
    global_step = 0
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
              input_ids = ids,
              attention_mask = mask, 
              max_length=150, 
              num_beams=2,
              repetition_penalty=2.5, 
              length_penalty=1.0, 
              early_stopping=True
              )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]

            predictions.extend(preds)
            actuals.extend(target)
            #writer.add_scalar("loss_validation", float(loss.detach().numpy()), global_step)
            #global_step += 1
    return predictions, actuals



In [3]:
def main(config):
    model_params={
        "MODEL":"t5-small",             # model_type: t5-base/t5-large
        "TRAIN_BATCH_SIZE":8,          # training batch size
        "VALID_BATCH_SIZE":8,          # validation batch size
        "TRAIN_EPOCHS":3,              # number of training epochs
        "VAL_EPOCHS":1,                # number of validation epochs
        "LEARNING_RATE":1e-4,          # learning rate
        "MAX_SOURCE_TEXT_LENGTH":1200,  # max length of source text
        "MAX_TARGET_TEXT_LENGTH":200,   # max length of target text
        "SEED": 42                     # set seed for reproducibility 

    }


    source_text='text'
    target_text='distractor'
    model_params=model_params
    output_dir="outputs"

    with open('/cluster/home/fgonzalez/nlp/data/distractor/race_dev_updated.json', 'r') as content_file:
        content = content_file.read()
    content=content.replace('\n',',')
    content='['+content[:-1]+']'
    records = json.loads(content)
    records=pd.DataFrame(records)
    
    # Set random seeds and deterministic pytorch for reproducibility
    torch.manual_seed(model_params["SEED"]) # pytorch random seed
    np.random.seed(model_params["SEED"]) # numpy random seed
    torch.backends.cudnn.deterministic = True


    # tokenzier for encoding the text
    tokenizer = T5Tokenizer.from_pretrained(model_params["MODEL"])

    # Defining the model. We are using t5-base model and added a Language model layer on top for generation of Summary. 
    # Further this model is sent to device (GPU/TPU) for using the hardware.
    model = T5ForConditionalGeneration.from_pretrained(model_params["MODEL"])
    model = model.to(C.DEVICE)


    records=records.assign(question=records.question.str.join(' '))
    records=records.assign(distractor=records.distractor.str.join(' '))
    records=records.assign(article=records.article.str.join(' '))
    records=records.assign(answer_text=records.answer_text.str.join(' '))

    records=records.loc[:,['article','question','answer_text','distractor']]

    records=records.assign(text="distraction passage: "+records.article+" question: "+records.question+" answer: "+records.answer_text)

    records=records.loc[:,['text','distractor']]

    # Creation of Dataset and Dataloader
    # Defining the train size. So 80% of the data will be used for training and the rest for validation. 
    train_size = 0.8
    train_dataset=records.sample(frac=train_size,random_state = model_params["SEED"])
    val_dataset=records.drop(train_dataset.index).reset_index(drop=True)
    train_dataset = train_dataset.reset_index(drop=True)



    # Creating the Training and Validation dataset for further creation of Dataloader
    training_set = YourDataSetClass(train_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"], source_text, target_text)
    val_set = YourDataSetClass(val_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"], source_text, target_text)


    # Defining the parameters for creation of dataloaders
    train_params = {
      'batch_size': model_params["TRAIN_BATCH_SIZE"],
      'shuffle': True,
      'num_workers': 0
      }


    val_params = {
      'batch_size': model_params["VALID_BATCH_SIZE"],
      'shuffle': False,
      'num_workers': 0
      }


    # Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.
    training_loader = DataLoader(training_set, **train_params)
    val_loader = DataLoader(val_set, **val_params)


    # Defining the optimizer that will be used to tune the weights of the network in the training session. 
    optimizer = torch.optim.Adam(params =  model.parameters(), lr=model_params["LEARNING_RATE"])



    loader=training_loader
    experiment_id = int(time.time())
    experiment_name = "name"
    model_dir = create_model_dir("/cluster/home/fgonzalez/nlp/experiments/", experiment_id, experiment_name)
        # Create Tensorboard logger.
    global_step = 0
    writer = SummaryWriter(os.path.join(model_dir, 'logs'))
    for epoch in range(model_params["TRAIN_EPOCHS"]):
        global_step=train(epoch, tokenizer, model, C.DEVICE, training_loader, optimizer,writer,global_step)

    #Saving the model after training
    path = os.path.join(output_dir, "model_files")
    model.save_pretrained(path)
    tokenizer.save_pretrained(path)


    # evaluating test dataset
    for epoch in range(model_params["VAL_EPOCHS"]):
        predictions, actuals = validate(epoch, tokenizer, model, C.DEVICE, val_loader,writer)
        final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})
        final_df.to_csv('/cluster/home/fgonzalez/nlp/experiments/predictions.csv')

In [4]:
training_args = TrainingArguments(
    output_dir='./results', 
    evaluation_strategy='steps',          # output directory
    num_train_epochs=3,              # total # of training epochs
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate schedulery
)


In [5]:
training_args

TrainingArguments(output_dir=./results, overwrite_output_dir=False, do_train=False, do_eval=None, do_predict=False, evaluation_strategy=IntervalStrategy.STEPS, prediction_loss_only=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=5e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=3, max_steps=-1, lr_scheduler_type=SchedulerType.LINEAR, warmup_ratio=0.0, warmup_steps=500, logging_dir=runs/May09_13-44-23_lo-login-02, logging_strategy=IntervalStrategy.STEPS, logging_first_step=False, logging_steps=500, save_strategy=IntervalStrategy.STEPS, save_steps=500, save_total_limit=None, no_cuda=False, seed=42, fp16=False, fp16_opt_level=O1, fp16_backend=auto, fp16_full_eval=False, local_rank=-1, tpu_num_cores=None, tpu_metrics_debug=False, debug=False, dataloader_drop_last=False, eval_steps=500, dataloader_num_workers=0, past_index=-1, run_

In [6]:
model_params={
    "MODEL":"t5-small",             # model_type: t5-base/t5-large
    "TRAIN_BATCH_SIZE":8,          # training batch size
    "VALID_BATCH_SIZE":8,          # validation batch size
    "TRAIN_EPOCHS":3,              # number of training epochs
    "VAL_EPOCHS":1,                # number of validation epochs
    "LEARNING_RATE":1e-4,          # learning rate
    "MAX_SOURCE_TEXT_LENGTH":1200,  # max length of source text
    "MAX_TARGET_TEXT_LENGTH":200,   # max length of target text
    "SEED": 42                     # set seed for reproducibility 

}


source_text='text'
target_text='distractor'
model_params=model_params
output_dir="outputs"

with open('/cluster/home/fgonzalez/nlp/data/distractor/race_dev_updated.json', 'r') as content_file:
    content = content_file.read()
content=content.replace('\n',',')
content='['+content[:-1]+']'
records = json.loads(content)
records=pd.DataFrame(records)

# Set random seeds and deterministic pytorch for reproducibility
torch.manual_seed(model_params["SEED"]) # pytorch random seed
np.random.seed(model_params["SEED"]) # numpy random seed
torch.backends.cudnn.deterministic = True


# tokenzier for encoding the text
tokenizer = T5Tokenizer.from_pretrained(model_params["MODEL"])

# Defining the model. We are using t5-base model and added a Language model layer on top for generation of Summary. 
# Further this model is sent to device (GPU/TPU) for using the hardware.
model = T5ForConditionalGeneration.from_pretrained(model_params["MODEL"])

model = model.to(C.DEVICE)


records=records.assign(question=records.question.str.join(' '))
records=records.assign(distractor=records.distractor.str.join(' '))
records=records.assign(article=records.article.str.join(' '))
records=records.assign(answer_text=records.answer_text.str.join(' '))

records=records.loc[:,['article','question','answer_text','distractor']]

records=records.assign(text="distraction passage: "+records.article+" question: "+records.question+" answer: "+records.answer_text)

records=records.loc[:,['text','distractor']]

# Creation of Dataset and Dataloader
# Defining the train size. So 80% of the data will be used for training and the rest for validation. 
train_size = 0.8
train_dataset=records.sample(frac=train_size,random_state = model_params["SEED"])
val_dataset=records.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)



# Creating the Training and Validation dataset for further creation of Dataloader
training_set = YourDataSetClass(train_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"], source_text, target_text)
val_set = YourDataSetClass(val_dataset, tokenizer, model_params["MAX_SOURCE_TEXT_LENGTH"], model_params["MAX_TARGET_TEXT_LENGTH"], source_text, target_text)


# Defining the parameters for creation of dataloaders
train_params = {
  'batch_size': model_params["TRAIN_BATCH_SIZE"],
  'shuffle': True,
  'num_workers': 0
  }


val_params = {
  'batch_size': model_params["VALID_BATCH_SIZE"],
  'shuffle': False,
  'num_workers': 0
  }


# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.
training_loader = DataLoader(training_set, **train_params)
val_loader = DataLoader(val_set, **val_params)


# Defining the optimizer that will be used to tune the weights of the network in the training session. 
optimizer = torch.optim.Adam(params =  model.parameters(), lr=model_params["LEARNING_RATE"])



loader=training_loader


In [7]:
experiment_id = int(time.time())
experiment_name = "name"
model_dir = create_model_dir("/cluster/home/fgonzalez/nlp/experiments/", experiment_id, experiment_name)
    # Create Tensorboard logger.
global_step = 0
writer = SummaryWriter(os.path.join(model_dir, 'logs'))
#for epoch in range(model_params["TRAIN_EPOCHS"]):
#    global_step=train(epoch, tokenizer, model, C.DEVICE, training_loader, optimizer,writer,global_step)



In [8]:
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dro

In [9]:
device=C.DEVICE

In [10]:
model.train()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dro

trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset            # evaluation dataset
)

In [None]:

for _,data in enumerate(loader, 0):
    y = data['target_ids'].to(device, dtype = torch.long)
    y_ids = y[:, :-1].contiguous()
    lm_labels = y[:, 1:].clone().detach()
    lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
    ids = data['source_ids'].to(device, dtype = torch.long)
    mask = data['source_mask'].to(device, dtype = torch.long)

    outputs = model.forward(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
    loss = outputs[0]
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    writer.add_scalar("loss", float(loss.detach().numpy()), global_step)
    
    ### measure bleu
    model.eval()
    predictions = []
    actuals = []

    generated_ids = model.generate(
      input_ids = ids,
      attention_mask = mask, 
      max_length=150, 
      num_beams=2,
      repetition_penalty=2.5, 
      length_penalty=1.0, 
      early_stopping=True
      )
    preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
    target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]

    predictions.extend(preds)
    actuals.extend(target)

    temp_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})

    val=records.rename(columns={'distractor':'Actual Text'})

    gen_dist=val.merge(temp_df,on=['Actual Text']).loc[:,['text','Generated Text']]

    distractors=val.groupby(['text']).agg({ 'Actual Text': lambda x: list(x)}).reset_index()

    dist_compare=distractors.merge(gen_dist,on=['text'])

    dist_compare=dist_compare.assign(bleu=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 1, 0)),axis=1))
    bleu_3=dist_compare.bleu.mean()
    model.train()
    writer.add_scalar("bleu3", bleu_3, global_step)
    
    
    global_step += 1


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [12]:
def bleu_training():
    model.eval()
    predictions = []
    actuals = []

    generated_ids = model.generate(
      input_ids = ids,
      attention_mask = mask, 
      max_length=150, 
      num_beams=2,
      repetition_penalty=2.5, 
      length_penalty=1.0, 
      early_stopping=True
      )
    preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
    target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]

    predictions.extend(preds)
    actuals.extend(target)

    temp_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})

    val=records.rename(columns={'distractor':'Actual Text'})

    gen_dist=val.merge(temp_df,on=['Actual Text']).loc[:,['text','Generated Text']]

    distractors=val.groupby(['text']).agg({ 'Actual Text': lambda x: list(x)}).reset_index()

    dist_compare=distractors.merge(gen_dist,on=['text'])

    dist_compare=dist_compare.assign(bleu=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 1, 0)),axis=1))
    bleu_3=dist_compare.bleu.mean()
    model.train()
    writer.add_scalar("bleu3", bleu_3, global_step)

In [21]:
dist_compare

Unnamed: 0,text,Actual Text,Generated Text,bleu
0,distraction passage: I was 230 pounds this spr...,[the author gained weight on the food from Nuk...,True,5.813196e-312
1,"distraction passage: In some countries , open ...",[prefer eating on campus],True,1.925066e-310
2,distraction passage: It 's Jim 's birthday tod...,"[to put the drum away, to look after the drum,...",Jim's birthday,1.4495e-308
3,distraction passage: One potential problem wit...,[the author does n't want to give any tips on ...,True,6.127063e-313


In [22]:
dist_compare.bleu.mean()

3.67348317092741e-309

In [17]:
temp_df

Unnamed: 0,Generated Text,Actual Text
0,True,It makes the cities more beautiful.
1,True,He had little free time at weekends.
2,Jim's birthday,to make a noise with the drum
3,True,prefer eating on campus
4,True,"Eggplant has a history of nearly 1,000 years."
5,True,the allowance amount grows depending on the am...
6,True,Zone Manhattan sends food once a week
7,True,Friedman is a leading expert on computer science.


In [None]:
#Saving the model after training
path = os.path.join(output_dir, "model_files")
model.save_pretrained(path)
tokenizer.save_pretrained(path)


# evaluating test dataset
for epoch in range(model_params["VAL_EPOCHS"]):
    predictions, actuals = validate(epoch, tokenizer, model, C.DEVICE, val_loader,writer)
    final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})
    final_df.to_csv(os.path.join(model_dir, 'predictions.csv'),index=False)

In [23]:
final_df=pd.read_csv('/cluster/home/fgonzalez/nlp/experiments/predictions.csv')

val=records.rename(columns={'distractor':'Actual Text'})

gen_dist=val.merge(final_df,on=['Actual Text']).loc[:,['text','Generated Text']]

distractors=val.groupby(['text']).agg({ 'Actual Text': lambda x: list(x)}).reset_index()

dist_compare=distractors.merge(gen_dist,on=['text'])

In [24]:
dist_compare=dist_compare.assign(bleu=dist_compare.apply(lambda x:sentence_bleu(x['Actual Text'],x['Generated Text'],weights=(0, 0, 1, 0)),axis=1))

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [25]:
dist_compare.bleu.mean()

0.2904623040609807

In [14]:
dist_compare.bleu.mean()

0.2904623040609807

In [155]:
dist_compare[dist_compare['text']=="distraction passage: On Easter Day , 1722 , Dutch explorers    landed on Easter Island   . It was the first time that Easter Islanders had met people from the outside world . The strangers were about to discover something very strange themselves --that they were on an island with hundreds of huge stone statues   . The Dutch explorers wondered where the Islanders had come from and why and how they had built the statues . Now science is putting together the story .      The first people to arrive on the island came there around A.D. 700 . The society that developed there was based on fishing and farming to feed the population , which grew to 12,000 . Its success showed itself in a way that has become the island 's trademark   : hundreds of huge stone figures --the moai .      None of the moai was standing when scientists first arrived . People put them back up later ; but how had a Stone Age society ever made , moved and set them up there in the first place ? And why ?      There are nearly 900 moai on Easter Island , and while the questions about them remain unanswered , no one doubts the years of effort that must have gone into making them .      The real killer of the Easter Islanders came from across the ocean . After 1722 , it became popular for explorers to visit Easter Island , bringing diseases . The final blow    came in 1862 , when slave traders came from Peru and took away 1,500 people , one - third of the population . question: The passage implies that answer: in 1862 , before slave traders came , about 4,500 people were living on Easter Island"]

Unnamed: 0,text,Actual Text,Generated Text,bleu
626,"distraction passage: On Easter Day , 1722 , Du...",[the Islanders built the moai to show off thei...,the first people to arrive on Easter Island ca...,0.47749
627,"distraction passage: On Easter Day , 1722 , Du...",[the Islanders built the moai to show off thei...,the first people to arrive on Easter Island ca...,0.47749
628,"distraction passage: On Easter Day , 1722 , Du...",[the Islanders built the moai to show off thei...,the first people to arrive on Easter Island ca...,0.47749


In [156]:
dist_compare.loc[626,'Actual Text']

['the Islanders built the moai to show off their success',
 'the Dutch explorers discovered how the moai had been built',
 'the natives of Easter Island have been there for about 1,200 years']

In [None]:
if __name__ == '__main__':
    main(Configuration.parse_cmd())