<a href="https://colab.research.google.com/github/finardi/IA376A/blob/master/T5%20-%20beam%20search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<style type="text/css">
@media print { body { -webkit-print-color-adjust: exact; } }
</style>



# <span style="color:orange"> Paulo Finardi </span>
<span style="color:purple"> - Semana 10 </span>

Implementação de decodificadores

In [None]:
! nvidia-smi

Thu May 14 01:11:31 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8     8W /  75W |      0MiB /  7611MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [None]:
! pip install -q sacrebleu
! pip install -q transformers
! pip install --q unidecode
! pip install -q pytorch-lightning

In [None]:
# Basics
import os
import gzip
import random
import numpy as np
from google.colab import drive
import unidecode

# PyTorch
import torch 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Dataset e PyTorch Lightning
import sacrebleu
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

# Transformers
from transformers import T5ForConditionalGeneration, T5Tokenizer

#Typing
from typing import Dict
from typing import List
from typing import Tuple

INFO:transformers.file_utils:PyTorch version 1.5.0+cu101 available.
INFO:transformers.file_utils:TensorFlow version 2.2.0 available.


In [None]:
manual_seed = 0
def deterministic(rep=True):
    if rep:
        np.random.seed(manual_seed)
        torch.manual_seed(manual_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(manual_seed)
            torch.cuda.manual_seed_all(manual_seed)
        torch.backends.cudnn.enabled = False 
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        print(f'Deterministic experiment, seed: {manual_seed}')
    else:
        print('Random experiment')
deterministic()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Deterministic experiment, seed: 0


In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Dataset

In [None]:
! wget -nc https://storage.googleapis.com/neuralresearcher_data/unicamp/ia376e_2020s1/paracrawl_enpt_train.tsv.gz
! wget -nc https://storage.googleapis.com/neuralresearcher_data/unicamp/ia376e_2020s1/paracrawl_enpt_test.tsv.gz

File ‘paracrawl_enpt_train.tsv.gz’ already there; not retrieving.

File ‘paracrawl_enpt_test.tsv.gz’ already there; not retrieving.



In [None]:
def load_text_pairs(path):
    text_pairs = []
    for line in gzip.open(path, mode='rt'):
        text_pairs.append(line.strip().split('\t'))
    return text_pairs

x_train_ = load_text_pairs('paracrawl_enpt_train.tsv.gz')
x_test  = load_text_pairs('paracrawl_enpt_test.tsv.gz')

# Embaralhamos o treino para depois fazermos a divisão treino/val.
random.shuffle(x_train_)

In [None]:
# conj. treino = 20k amostras
# conj. valid  = 2k amostras

split = 20_000
x_train = x_train_[:split]
x_val   = x_train_[split: split+ 2_000]  
len(x_train), len(x_val), len(x_test)

(20000, 2000, 20000)

In [None]:
size_model = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(size_model)

INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f


In [None]:
source_max_length, target_max_length = 256, 256

class MyDS(Dataset):
    def __init__(self, text_pairs, tokenizer, source_max_length, target_max_length):
        self.tokenizer = tokenizer
        self.text_pairs = text_pairs
        self.source_max_length = source_max_length
        self.target_max_length = target_max_length
        
    def __len__(self):
        return len(self.text_pairs)

    def add_prefix(self,text,source_language='English',target_language='Portuguese'):
        prefix_text = f'translate {source_language} to {target_language}: '
        return prefix_text+text
    
    def __getitem__(self, idx):
        source, target = self.text_pairs[idx]

        source_tokenized = \
        self.tokenizer.encode_plus(self.add_prefix(f'{unidecode.unidecode(source)} {self.tokenizer.eos_token}'),
                                max_length=self.source_max_length,
                                pad_to_max_length=True,
                                return_tensors='pt')
        target_tokenized = \
        self.tokenizer.encode_plus(f'{unidecode.unidecode(target)} {self.tokenizer.eos_token}',
                                max_length=self.target_max_length,
                                pad_to_max_length=True,
                                return_tensors='pt')

        source_token_ids = source_tokenized['input_ids'].squeeze()
        source_mask = source_tokenized['attention_mask'].squeeze()
        original_source = source
        
        target_token_ids = target_tokenized['input_ids'].squeeze()
        target_mask = target_tokenized['attention_mask'].squeeze()
        original_target = target

        return (original_source, source_token_ids, source_mask, 
                original_target, target_token_ids, target_mask)

## Testando o Dataset

In [None]:
text_pairs = [('we like pizza', 'eu gosto de pizza')]
dataset_debug = MyDS(text_pairs, tokenizer, 10,10)

dataloader_debug = DataLoader(dataset_debug, batch_size=10, shuffle=True, num_workers=0)

x, x_ids, x_mask, y, y_ids, y_mask = next(iter(dataloader_debug))
print('source_token_ids:\n', x_ids)
print('source_mask:\n', x_mask)
print('target_token_ids:\n', y_ids)
print('target_mask:\n', y_mask)

print('source_token_ids.shape:', x_ids.shape)
print('source_mask.shape:', x_mask.shape)
print('target_token_ids.shape:', y_ids.shape)
print('target_mask.shape:', y_mask.shape)

source_token_ids:
 tensor([[13959,  1566,    12, 21076,    10,    62,   114,  6871,     1,     0]])
source_mask:
 tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
target_token_ids:
 tensor([[   3,   15,   76,  281,    7,  235,   20, 6871,    1,    0]])
target_mask:
 tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0]])
source_token_ids.shape: torch.Size([1, 10])
source_mask.shape: torch.Size([1, 10])
target_token_ids.shape: torch.Size([1, 10])
target_mask.shape: torch.Size([1, 10])


### Datasets e Dataloaders

In [None]:
x_max_length = 256
y_max_length = 256
batch_size   = 4

ds_train = MyDS(x_train, tokenizer, x_max_length, y_max_length)
ds_val   = MyDS(x_val, tokenizer, x_max_length, y_max_length)
ds_test  = MyDS(x_test[:1000], tokenizer, x_max_length, y_max_length)

dataloaders = {
    'train': DataLoader(ds_train,
                        batch_size=batch_size,
                        num_workers=4,
                        pin_memory=True),
    'val':   DataLoader(ds_val,
                        batch_size=batch_size,
                        num_workers=4,
                        pin_memory=False),
    'test':  DataLoader(ds_test,
                        batch_size=batch_size,
                        num_workers=4,
                        pin_memory=False),
               }

# sanity check
dl_sizes = {x: len(dataloaders[x]) for x in dataloaders.keys()}
dl_sizes 

{'test': 250, 'train': 5000, 'val': 500}

# Implementação Top_p

In [None]:
class Top_p(torch.nn.Module):
    def __init__(self, p=0.8, min_samples=1):
        super(Top_p, self).__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(size_model)
        self.p = p
        self.min_samples = min_samples

    def generate(self, *args, **kwargs):
        return self.model.generate(*args, **kwargs)

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)

# Implementação inspirada no cód. do Israel
    @torch.no_grad()
    def topp_decode(self, input_ids, attention_mask=None, max_seq_len=32, **decode_kwargs):
        self.eval()
        batch_size = input_ids.shape[0]
        device = input_ids.device
        eos_token_id = self.model.config.eos_token_id
        
        encoder_outs   = self.model.encoder(input_ids, attention_mask)
        decoder_inputs = self.model.config.decoder_start_token_id *\
                         torch.ones(batch_size, 1, dtype=torch.long, device=device)
        
        for i in range(max_seq_len):
            logits, _, _ = self(encoder_outputs=encoder_outs, 
                                decoder_input_ids=decoder_inputs, 
                                attention_mask=attention_mask)
         
            probs = torch.softmax(logits, dim=-1)[:,-1,:]
            sorted_probs, indices = torch.sort(probs, dim=-1, descending=True)
            probs_cumsum = torch.cumsum(sorted_probs, dim=-1)
            to_zero = (probs_cumsum > self.p)[:,self.min_samples:]
            sorted_probs[:,self.min_samples:][to_zero] = 0
            selected_indices = torch.multinomial(sorted_probs, num_samples=1)
            next_token_ids = indices[:, selected_indices.view(-1)][:,0].unsqueeze(1)

            last_token_ids = decoder_inputs[:,-1]          
            last_was_eos = last_token_ids == eos_token_id  
            next_token_ids[last_was_eos,-1] = eos_token_id 
            all_eos = (next_token_ids == eos_token_id).sum().item() == batch_size
            
            decoder_inputs = torch.cat((decoder_inputs, next_token_ids), dim=-1) 
            if all_eos == batch_size:
                break
        return decoder_inputs

# Implementação Top_k

In [None]:
class Top_k(torch.nn.Module):
    def __init__(self, k=60):
        super(Top_k, self, ).__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(size_model)
        self.k = k

    def generate(self, *args, **kwargs):
        return self.model.generate(*args, **kwargs)

    def forward(self, *args, **kwargs):
        return self.model(*args, **kwargs)

# Implementação inspirada no cód. do Israel
    @torch.no_grad()
    def topk_decode(self, input_ids, attention_mask=None, max_seq_len=32, **decode_kwargs):
        self.eval()
        batch_size = input_ids.shape[0]
        device = input_ids.device
        eos_token_id = self.model.config.eos_token_id
        
        encoder_outs   = self.model.encoder(input_ids, attention_mask)
        decoder_inputs = self.model.config.decoder_start_token_id *\
                         torch.ones(batch_size, 1, dtype=torch.long, device=device)
        
        for i in range(max_seq_len):
            logits, _, _ = self(encoder_outputs=encoder_outs, 
                                decoder_input_ids=decoder_inputs, 
                                attention_mask=attention_mask)
            
            topk_probs, topk_indices = torch.softmax(logits, dim=-1)[:,-1,:].topk(self.k, dim=-1, sorted=True)
            selected_indices = torch.multinomial(topk_probs, num_samples=1)
            next_token_ids = topk_indices[:, selected_indices.view(-1)][:,0].unsqueeze(1)

            last_token_ids = decoder_inputs[:,-1]          # take last predicted token
            last_was_eos = last_token_ids == eos_token_id  # checks if eos was predicted
            next_token_ids[last_was_eos,-1] = eos_token_id # if the last was eos then next is eos
            all_eos = (next_token_ids == eos_token_id).sum().item() == batch_size # check if all new tokens are eos
            
            decoder_inputs = torch.cat((decoder_inputs, next_token_ids), dim=-1) # concat
            if all_eos == batch_size:
                break
        return decoder_inputs

# Testando Top_k


In [None]:
model = Top_k(k=80).to(device)

ckpt = torch.load('/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt', map_location=device)
ckpt.keys()
model.load_state_dict(ckpt['state_dict'])

In [None]:
model = Top_k(k=80).to(device)

ckpt = torch.load('/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt', map_location=device)
ckpt.keys()
model.load_state_dict(ckpt['state_dict'])

_, x, mask, y, y_token_ids, y_mask = next(iter(dataloaders['train']))
max_seq_len = 128

target_token_ids = model.topk_decode(x.to(device), mask.to(device), max_seq_len=max_seq_len)
for seq in target_token_ids:
    print(tokenizer.decode(seq), '\n')

Serviços de jardinagem em Cauca (Santa Cruz, Bol ⁇ via) 

Com vistas espetacular-se do mar e da praia em todos os quartos, desfrute de uma maravilhosa pôr do sol em frente ao mar do seu grande terraço com mais de 45 m2. 

Farmacêutica em Modegua, Peru 

Cl ⁇ nicas e hospitalose em Texas 



# Testando Nucleus - top_p

In [None]:
del model
model = Top_p(p=0.95, min_samples=1).to(device)
ckpt = torch.load('/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt', map_location=device)
ckpt.keys()
model.load_state_dict(ckpt['state_dict'])

In [None]:
target_token_ids = model.topp_decode(x.to(device), mask.to(device), max_seq_len=max_seq_len)
for seq in target_token_ids:
    print(tokenizer.decode(seq), '\n')

Serviços de jardinagem em Pedro Rubio (Santa Cruz, Bol ⁇ via) 

Com vistas espetacularionais do mar e da praia em todos os quartos, desfrute de uma maravilhosa pôr do sol em frente ao mar do seu grande terraço com mais de 45 m2. 

Farmacêutica em Moyuria, Peru 

Cl ⁇ nicas e hospitaconismo em Texas 



# Comparando com o Hugging Face

In [None]:
class T5Finetuner(pl.LightningModule):
    def __init__(self, tokenizer, dataloader):
        super(T5Finetuner, self).__init__()

        self.model      = T5ForConditionalGeneration.from_pretrained(size_model)
        self.dataloader = dataloader
        self.tokenizer  = tokenizer

    def forward(self, x_token_ids, x_mask, y_token_ids=None, y_mask=None):
        if self.training:
            outputs = self.model.forward(input_ids = x_token_ids, attention_mask = x_mask,
                                         lm_labels  = y_token_ids)
            return outputs[0] 
        else:
            predicted_token_ids = self.model.generate(input_ids = x_token_ids, attention_mask = x_mask,
                                                      max_length=128)
            return predicted_token_ids

    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad],lr=5e-3)

    def decode_token_ids(self, x_token_ids):
        translation = self.tokenizer.decode(x_token_ids,
                                            skip_special_tokens=True,
                                            clean_up_tokenization_spaces=False)
        return translation

    def training_step(self, batch, batch_nb):
        x_token_ids, x_mask, y_token_ids, y_mask, _, _ = batch
        loss = self(x_token_ids, x_mask, y_token_ids, y_mask)
        
        tensorboard_logs = {'train_loss': loss}
        progress_bar     = {'gpu_usage': gpu_usage()}
        return {'loss': loss, 'log': tensorboard_logs, 'progress_bar': progress_bar}

    def validation_step(self, batch, batch_nb):
        x_token_ids, x_mask, y_token_ids, y_mask, x, y = batch
        preds_token_ids  = self(x_token_ids, x_mask)
        preds = [self.decode_token_ids(token_ids) for token_ids in preds_token_ids]
        bleu_score       = sacrebleu.corpus_bleu(preds, [y]).score
        tensorboard_logs = {'val_bleu': bleu_score}
        progress_bar     = {'gpu_usage': gpu_usage()}
        return {'val_bleu': bleu_score, 'progress_bar': progress_bar, 'log':tensorboard_logs}

    def test_step(self, batch, batch_nb):
        x_token_ids, x_mask, y_token_ids, y_mask, x, y = batch
        preds_token_ids = self(x_token_ids, x_mask)
        preds = [self.decode_token_ids(token_ids) for token_ids in preds_token_ids]
        bleu_score   = sacrebleu.corpus_bleu(preds, [y]).score
        progress_bar = {'gpu_usage': gpu_usage()}
        return {'test_bleu': bleu_score, 'progress_bar': progress_bar}

    def validation_epoch_end(self, outputs):
        bleu_score       = sum([x['val_bleu'] for x in outputs]) / len(outputs)
        tensorboard_logs = {'avg_val_bleu': bleu_score}
        return {'avg_val_bleu': bleu_score, 'progress_bar': tensorboard_logs, 'log': tensorboard_logs}

    def training_epoch_end(self, outputs):
        avg_loss         = torch.stack([x['loss'] for x in outputs]).mean()
        tensorboard_logs = {'train_loss': avg_loss}
        return {'log': tensorboard_logs}
        
    def test_epoch_end(self, outputs):
        bleu_av          = sum([x['test_bleu'] for x in outputs]) / len(outputs)
        tensorboard_logs = {'avg_test_bleu': bleu_av}
        return {'avg_test_bleu': bleu_av, 'progress_bar': tensorboard_logs}
    
    def train_dataloader(self):
        return self.dataloader['train']
    
    def val_dataloader(self):
        return self.dataloader['val']
    
    def test_dataloader(self):
        return self.dataloader['test']

model = T5Finetuner(tokenizer, dataloaders)
del model

In [None]:
trainer = pl.Trainer(gpus=1,
                     max_epochs=2,
                     progress_bar_refresh_rate=60,
                     accumulate_grad_batches=8,
                     resume_from_checkpoint='/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt')

model = T5Finetuner(tokenizer, dataloaders)

INFO:lightning:GPU available: True, used: True
INFO:lightning:CUDA_VISIBLE_DEVICES: [0]
INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-small-config.json from cache at /root/.cache/torch/transformers/26561bc9e840d8945f475d0d4c4b9df32025eadd79894b867b570cb1d09e67a9.3817cc1260a6b941b17af62b4f2a942b9825f209d8e2eed99e79e96f85f59aab
INFO:transformers.configuration_utils:Model config T5Config {
  "architectures": [
    "T5WithLMHeadModel"
  ],
  "d_ff": 2048,
  "d_kv": 64,
  "d_model": 512,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_heads": 8,
  "num_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0

# Comparando Top K

In [None]:
top_k = Top_k(k=10).to(device)
ckpt = torch.load('/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt', map_location=device)
top_k.load_state_dict(ckpt['state_dict'])
top_k.topk_decode(x.to(device), mask.to(device), max_seq_len=128)
trainer.test(top_k)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

['Deste modo, a vida civil de uma nação amadurece, fazendo com que todos os cidadãos gozem dos frutos da tolerância genuína e do respeito mútuo.', '1999 XIII. Winnipeg, Canadá 23 de julho a 8 de agosto', 'No mistério do Natal, a luz de Cristo irradia-se sobre a terra, difundindo-se como círculos concêntricos.', 'e tem o objetivo de viabilizar a perfuração de dois novos furos no ocidente da citada península.']
['E desta maneira, a vida civil da nacao mature, possivel de todos os cidadaos de saber as frutas de tolerancia verdadeira e de respecto reciproco.', '1999 XIII. Winnipeg, Canada', 'No misterio de Novem, a luz de Cristo brilha na terra, spreading, sem como fora, em cercos centros.', 'faz vivavel perfurar dois novos escadas na ocidental do aquilo.']
--------------------------------------------------------------------------------
TEST RESULTS
{'test_bleu': 14.378540709075402}
--------------------------------------------------------------------------------



In [None]:
# Hugging Face
deterministic()
model.generate(x, mask, max_length=max_len, do_sample=True, top_k=topk)
trainer.test(model)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

['Deste modo, a vida civil de uma nação amadurece, fazendo com que todos os cidadãos gozem dos frutos da tolerância genuína e do respeito mútuo.', '1999 XIII. Winnipeg, Canadá 23 de julho a 8 de agosto', 'No mistério do Natal, a luz de Cristo irradia-se sobre a terra, difundindo-se como círculos concêntricos.', 'e tem o objetivo de viabilizar a perfuração de dois novos furos no ocidente da citada península.']
['E desta maneira, a vida civil da nacao mature, possivel de todos os cidadaos de saber as frutas de tolerancia verdadeira e de respecto reciproco.', '1999 XIII. Winnipeg, Canada', 'No misterio de Novem, a luz de Cristo brilha na terra, spreading, sem como fora, em cercos centros.', 'faz vivavel perfurar dois novos escadas na ocidental do aquilo.']
--------------------------------------------------------------------------------
TEST RESULTS
{'test_bleu': 14.399085808196498}
--------------------------------------------------------------------------------



# Comparando Nucleus Sampling - Top-p

In [None]:
top_p = Top_p(p=0.95, min_samples=1).to(device)
ckpt = torch.load('/content/drive/My Drive/Colab Notebooks/Semana10/epoch=3.ckpt', map_location=device)
top_p.load_state_dict(ckpt['state_dict'])
top_p.topp_decode(x.to(device), mask.to(device), max_seq_len=128)
trainer.test(top_p)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

['Deste modo, a vida civil de uma nação amadurece, fazendo com que todos os cidadãos gozem dos frutos da tolerância genuína e do respeito mútuo.', '1999 XIII. Winnipeg, Canadá 23 de julho a 8 de agosto', 'No mistério do Natal, a luz de Cristo irradia-se sobre a terra, difundindo-se como círculos concêntricos.', 'e tem o objetivo de viabilizar a perfuração de dois novos furos no ocidente da citada península.']
['E desta maneira, a vida civil da nacao mature, possivel de todos os cidadaos de saber as frutas de tolerancia verdadeira e de respecto reciproco.', '1999 XIII. Winnipeg, Canada', 'No misterio de Novem, a luz de Cristo brilha na terra, spreading, sem como fora, em cercos centros.', 'faz vivavel perfurar dois novos foras no estado na dois ocidentais do qualquer presido.']
--------------------------------------------------------------------------------
TEST RESULTS
{'test_bleu': 14.819372391368573}
--------------------------------------------------------------------------------



In [None]:
# Hugging Face
deterministic()
model.generate(x, mask, max_length=max_len, do_sample=True, top_p=topp)
trainer.test(model)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

['Deste modo, a vida civil de uma nação amadurece, fazendo com que todos os cidadãos gozem dos frutos da tolerância genuína e do respeito mútuo.', '1999 XIII. Winnipeg, Canadá 23 de julho a 8 de agosto', 'No mistério do Natal, a luz de Cristo irradia-se sobre a terra, difundindo-se como círculos concêntricos.', 'e tem o objetivo de viabilizar a perfuração de dois novos furos no ocidente da citada península.']
['E desta forma, a vida civil de uma nacao matura, perfeito a que todos os cidadaos servem a obras de lămurrio verdadeira toleracao e de respecto mutual.', '1999 XIII. Winnipeg, Canada', 'No misterio de Novembro, a luz de Cristo parva na terra, immerndo, como estava, em cercos centricos.', 'fazendo viual seiar duas novos pei ⁇ res na oeste daquil deda prekaria.']
--------------------------------------------------------------------------------
TEST RESULTS
{'test_bleu': 14.997882156408957}
--------------------------------------------------------------------------------



# <span style="color:purple">Fim do notebook