# Finetune Pix2Struct model on Pix2Code HTML Lorem Ipsum dataset

## Setup Envirnoment

In [None]:
!pip install transformers==4.33.1

Collecting transformers==4.33.1
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers==4.33.1)
  Downloading huggingface_hub-0.17.2-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.33.1)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers==4.33.1)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m 

In [None]:
#!pip install --upgrade git+https://github.com/huggingface/transformers

In [None]:
!pip install -q wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.8/224.8 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


## Import necessary libraries

In [None]:
from google.colab import drive
import os
import zipfile
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import re
from transformers import Pix2StructForConditionalGeneration, AutoProcessor
import torch
from torch.nn import functional as F
from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
from pathlib import Path
from nltk import edit_distance
import numpy as np
import wandb
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu, SmoothingFunction
from torch.utils.data import random_split
import random

## Define variables and parameters

In [None]:
G_DRIVE_FOLDER_DATASET = '/content/drive/MyDrive/Datasets/'
G_DRIVE_FOLDER_CHECKPOINTS = '/content/drive/MyDrive/Checkpoints/'
DATASET_NAME = 'pix2code_web_with_html_loremipsum'
ZIP_NAME = DATASET_NAME + '.zip'
DESTINATION_FOLDER= '/content/data/'
DATASET_FOLDER = DESTINATION_FOLDER + 'web_with_html_loremipsum/' # unzipped name

EXPERIMENT_NAME = "Pix2Struct_Pix2Code_HTML_LI"

MAX_SENTENCE_LEN = 1024

MAX_PATCHES = 1024

DEBUG = False
VERBOSE = True

BATCH_SIZE = 4
NUM_WARMUP_STEPS = 500
MAX_EPOCHS = 10
LR = 1e-4
CHECK_VAL_EVERY_N_EPOCH = 5
GRADIENT_CLIP_VAL = 1.0
ACCUMULATE_GRAD_BATCHES = 8 / BATCH_SIZE

TRAIN_SET_PERCENTAGE = 0.89
VALID_SET_PERCENTAGE = 0.01
# TEST_SET_PERCENTAGE is 1 - TRAIN_SET_PERCENTAGE - VALID_SET_PERCENTAGE # Use 1000 for test


RANDOM_SEED = 100

LOAD_FROM_CHECKPOINT = True
LAST_CHECKPOINT_NAME = "FINAL_Pix2Struct_Pix2Code_epoch[19]_bleu[0.98].pth"

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


## Load Pix2Code Dataset

### Mount Google Drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


### Import zip file from Google Drive

In [None]:
os.makedirs(DESTINATION_FOLDER, exist_ok=True)

with zipfile.ZipFile(G_DRIVE_FOLDER_DATASET + ZIP_NAME, "r") as zf:
  zf.extractall(DESTINATION_FOLDER)

## Load Model and Processor

In [None]:
repo_id = "google/pix2struct-base"

processor = AutoProcessor.from_pretrained(repo_id)
model = Pix2StructForConditionalGeneration.from_pretrained(repo_id, is_encoder_decoder=True)

Downloading (…)rocessor_config.json:   0%|          | 0.00/231 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/3.27M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.13G [00:00<?, ?B/s]

In [None]:
if LOAD_FROM_CHECKPOINT:
    print("Loading model from checkpoint:", LAST_CHECKPOINT_NAME)
    checkpoint = torch.load(G_DRIVE_FOLDER_CHECKPOINTS + LAST_CHECKPOINT_NAME)

    # Load only model weights from training on Pix2Code guis
    model.load_state_dict(checkpoint)


Loading model from checkpoint: FINAL_Pix2Struct_Pix2Code_epoch[19]_bleu[0.98].pth


## Create Dataset class

In [None]:
def preprocess_html_file(html_text):
    text_without_header = re.sub(r'<header>.*?</header>', '', html_text, flags=re.DOTALL)
    text_without_footer = re.sub(r'<footer class="footer">.*?</footer>', '', text_without_header, flags=re.DOTALL)
    text_without_script = re.sub(r'<script .*?</script>', '', text_without_footer, flags=re.DOTALL)
    text_without_linebreaks = text_without_script.replace('\n', ' ')
    text_without_multiple_spaces = re.sub(r'\s+', ' ', text_without_linebreaks)
    return text_without_multiple_spaces

### Filter files with less tokens than 1024 and add new unknown tokens

In [None]:
# Get a list of all files in root_dir
files = os.listdir(DATASET_FOLDER)

# Find only html files
all_html_files = [file for file in files if file.endswith('.html')]

In [None]:
# Find max length
max_length = 0

bigger_than_1024 = 0
lower_than_1024 = 0

html_files_filtered = []

tokens_to_add = set()

for html_file_path in all_html_files:
    with open(DATASET_FOLDER + "/" + html_file_path, "r") as reader:
        preprocessed_text = preprocess_html_file(reader.read())
        splitted_text = processor.tokenizer(preprocessed_text).tokens()
        if len(splitted_text) > 1024:
            bigger_than_1024 += 1
        else:
            lower_than_1024 += 1
            html_files_filtered.append(html_file_path)
            tokens_to_add = tokens_to_add.union(set(splitted_text))

print("bigger_than_1024= ", bigger_than_1024)
print("lower_than_1024= ", lower_than_1024)

newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
print(f"Number of new tokens = {newly_added_num}")

# Resize the model's token embeddings if there are new tokens
if newly_added_num > 0:
    model.decoder.resize_token_embeddings(len(processor.tokenizer), pad_to_multiple_of=8)

bigger_than_1024=  0
lower_than_1024=  1742
Number of new tokens = 0


In [None]:
print(len(html_files_filtered))

1742


In [None]:
random.seed(RANDOM_SEED)

# Use the same seed, so that parts remain the same
random.shuffle(html_files_filtered)

train_len = int(TRAIN_SET_PERCENTAGE * len(html_files_filtered))
valid_len = int(VALID_SET_PERCENTAGE * len(html_files_filtered))

train_paths = html_files_filtered[:train_len]
valid_paths = html_files_filtered[train_len:train_len+valid_len]
test_paths = html_files_filtered[train_len+valid_len:]

print(f"TRAIN_SET size = {len(train_paths)}")
print(f"VALID_SET size = {len(valid_paths)}")
print(f"TEST_SET size = {len(test_paths)}")

TRAIN_SET size = 1550
VALID_SET size = 17
TEST_SET size = 175


In [None]:
class Pix2CodeDataset(Dataset):
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.encodings = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir, text_file)
            image_file_path = os.path.join(root_dir, image_file)

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            labels = input_ids.squeeze().clone()
            labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

            encoding["labels"] = labels

            # For each sample save directly the encoding of both text and image
            self.encodings.append(encoding)

    def __len__(self):
        return len(self.encodings)

    def __getitem__(self, idx):
        return self.encodings[idx]

In [None]:
# Transformations for the image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert PIL Image to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # normalize for pretrained models
])

# Instantiate the CustomDataset
train_dataset = Pix2CodeDataset(DATASET_FOLDER, transform, train_paths)
val_dataset = Pix2CodeDataset(DATASET_FOLDER, transform, valid_paths)

# Use DataLoader for batching and shuffling
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=False) # Use 10 as batch for testing

100%|██████████| 1550/1550 [01:25<00:00, 18.22it/s]
100%|██████████| 17/17 [00:00<00:00, 19.03it/s]


In [None]:
print(f"train_dataloader size = {len(train_dataloader)}")
print(f"val_dataloader size = {len(val_dataloader)}")

train_dataloader size = 388
val_dataloader size = 2


In [None]:
batch = next(iter(train_dataloader))
encoding = batch

In [None]:
encoding

{'flattened_patches': tensor([[[ 1.0000,  1.0000, -0.0395,  ..., -0.0395,  0.2814,  0.6590],
          [ 1.0000,  2.0000, -0.0395,  ..., -6.2786, -6.0970, -5.6910],
          [ 1.0000,  3.0000, -0.0395,  ..., -6.2344, -5.8883, -5.3066],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 1.0000,  1.0000, -0.0369,  ..., -0.0369,  0.2828,  0.6590],
          [ 1.0000,  2.0000, -0.0369,  ..., -6.2522, -6.0713, -5.6669],
          [ 1.0000,  3.0000, -0.0369,  ..., -6.2386, -6.0167, -5.5677],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 1.0000,  1.0000, -0.1393,  ..., -0.1393,  0.1879,  0.5730],
       

In [None]:
encoding["flattened_patches"][0]

tensor([[ 1.0000,  1.0000, -0.0395,  ..., -0.0395,  0.2814,  0.6590],
        [ 1.0000,  2.0000, -0.0395,  ..., -6.2786, -6.0970, -5.6910],
        [ 1.0000,  3.0000, -0.0395,  ..., -6.2344, -5.8883, -5.3066],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [None]:
encoding["labels"]

tensor([[50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100]])

In [None]:
encoding["labels"][0]

tensor([50190, 50227,   411,  ...,  -100,  -100,  -100])

In [None]:
labels_list = encoding["labels"][0].tolist()

# Filter out the -100 values
filtered_labels = [token for token in labels_list if token != -100]

# Decode the cleaned list of tokens
decoded_text_example = processor.tokenizer.batch_decode([filtered_labels], skip_special_tokens=True)[0]


In [None]:
decoded_text_example



In [None]:
for k,v in encoding.items():
    print(k,v.shape)

flattened_patches torch.Size([4, 1024, 770])
attention_mask torch.Size([4, 1024])
labels torch.Size([4, 1024])


## Training

In [None]:
START_TOKEN_ID = PAD_TOKEN_ID = processor.tokenizer.pad_token_id

In [None]:
def move_to_device(data):
    if isinstance(data, (list,tuple)):
        return [move_to_device(x) for x in data]
    elif isinstance(data, dict):
        return {k: move_to_device(v) for k, v in data.items()}
    elif isinstance(data, torch.Tensor):
        return data.to(DEVICE)
    else:
        return data

### Main training function

In [None]:
def train_model(config, processor, model, train_dataloader, val_dataloader):
    # Extract configuration values
    lr = config.get("lr")
    max_epochs = config.get("max_epochs")
    num_warmup_steps = config.get("num_warmup_steps")

    model.to(DEVICE)

    optimizer = Adafactor(model.parameters(), scale_parameter=False, relative_step=False, lr=lr, weight_decay=1e-05)

    # Use total steps (i.e., max_epochs * length_of_train_data)
    total_steps = max_epochs * len(train_dataloader)
    scheduler = get_cosine_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=total_steps)

    global_step = 0  # to keep track of total steps
    epoch_start = 0

    wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config)

    #epoch_last = epoch_start + max_epochs - 1
    epoch_last = max_epochs - 1
    #for epoch in range(epoch_start, epoch_start + max_epochs):
    for epoch in range(epoch_start, max_epochs):
        global_step, moving_avg_loss = training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last)

        if config.get("verbose", False):
            print(f"Moving Avg Loss: {moving_avg_loss:.3f}")

        wandb.log({"moving_avg_loss": moving_avg_loss, **{f'lr_{i}': param_group['lr'] for i, param_group in enumerate(optimizer.param_groups)}})

        # Save the model after each validation step
        save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb.run.id, EXPERIMENT_NAME, G_DRIVE_FOLDER_CHECKPOINTS)

        #if epoch == 0 + epoch_start or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
        if epoch == 0 or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
            avg_bleu_score = testing_loop(val_dataloader, model, processor, config, f"Epoch {epoch}/{epoch_last} - valid loop")

            if config.get("verbose", False):
                print(f" Avg Bleu Score: {avg_bleu_score:.2f}")

            wandb.log({"bleu": avg_bleu_score})

    wandb.finish()

In [None]:
def training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last):
    model.train()
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}/{epoch_last} - train loop")

    # Extract configuration values
    accumulate_grad_batches = config.get('accumulate_grad_batches', 1)
    gradient_clip_val = config.get("gradient_clip_val")

    moving_avg_loss = 0
    alpha = 0.1 # Smoothing factor

    for step, batch in train_loop:
        encoding = move_to_device(batch)
        labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

        outputs = model(labels=labels, flattened_patches=flattened_patches, attention_mask=attention_mask)
        loss = outputs.loss
        loss.backward()

        if global_step % accumulate_grad_batches == 0 or step == len(train_dataloader) - 1:
            if gradient_clip_val:
                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip_val)
            optimizer.step()
            optimizer.zero_grad()

        # Update the progress bar
        train_loop.set_postfix({'loss': loss.item()}, refresh=True)

        scheduler.step()
        global_step += 1

        # Update the moving average loss
        moving_avg_loss = loss.item() if moving_avg_loss == 0 else alpha * loss.item() + (1 - alpha) * moving_avg_loss

        # Log Loss after each step
        wandb.log({"loss": loss.item()})

    return global_step, moving_avg_loss

In [None]:
def testing_loop(testing_dataloader, model, processor, config, description):
    model.eval()
    bleu_scores = []

    with torch.no_grad():
        test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
        for i, batch in test_loop:
            encoding = move_to_device(batch)
            labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

            outputs = model.generate(flattened_patches=flattened_patches, attention_mask=attention_mask, max_new_tokens=MAX_SENTENCE_LEN)

            predictions = processor.tokenizer.batch_decode(outputs, skip_special_tokens=True)

            labels[labels == -100] = 0
            answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

            bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

            avg_bleu_score = np.mean(bleu_scores)
            test_loop.set_postfix(bleu_score=avg_bleu_score)

            if config.get("verbose", False):
                for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
                    tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")

    return avg_bleu_score


In [None]:
def save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb_run_id, experiment_name, folder_path):
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scheduler_state_dict": scheduler.state_dict(),
        "epoch": epoch,
        "global_step": global_step,
        'wandb_run_id': wandb_run_id
    }
    model_name = f"{experiment_name}_epoch[{epoch}].pth"
    torch.save(checkpoint, folder_path + model_name)


In [None]:
config = {
          "batch_size": BATCH_SIZE,
          "num_warmup_steps": NUM_WARMUP_STEPS,
          "max_epochs": MAX_EPOCHS,
          "lr": LR,
          "check_val_every_n_epoch": CHECK_VAL_EVERY_N_EPOCH,
          "gradient_clip_val": GRADIENT_CLIP_VAL,
          "accumulate_grad_batches": ACCUMULATE_GRAD_BATCHES,
          "verbose": VERBOSE,
}

In [None]:
def validate_config(config):
    # Check required keys
    required_keys = [
        "batch_size",
        "num_warmup_steps",
        "max_epochs",
        "lr",
        "check_val_every_n_epoch",
        "gradient_clip_val",
        "accumulate_grad_batches",
        "verbose"
    ]
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Key '{key}' must be present in the configuration.")

    # Check that values are in expected ranges
    if config["batch_size"] <= 0:
        raise ValueError("batch_size must be positive.")
    if config["num_warmup_steps"] < 0:
        raise ValueError("num_warmup_steps must be non-negative.")
    if config["max_epochs"] <= 0:
        raise ValueError("max_epochs must be positive.")
    if config["lr"] <= 0:
        raise ValueError("Learning rate must be positive.")
    if config["check_val_every_n_epoch"] <= 0:
        raise ValueError("check_val_every_n_epoch must be positive.")
    if config["gradient_clip_val"] < 0:
        raise ValueError("gradient_clip_val must be non-negative.")
    if config["accumulate_grad_batches"] <= 0:
        raise ValueError("accumulate_grad_batches must be positive.")
    if not isinstance(config["verbose"], bool):
        raise ValueError("verbose must be a boolean value.")


In [None]:
validate_config(config)
print(config)

{'batch_size': 4, 'num_warmup_steps': 500, 'max_epochs': 10, 'lr': 0.0001, 'check_val_every_n_epoch': 5, 'gradient_clip_val': 1.0, 'accumulate_grad_batches': 2.0, 'verbose': True}


In [None]:
train_model(config, processor, model, train_dataloader, val_dataloader)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch 0/9 - train loop: 100%|██████████| 388/388 [05:02<00:00,  1.28it/s, loss=0.836]


Moving Avg Loss: 0.849


Epoch 0/9 - valid loop:  50%|█████     | 1/2 [03:01<03:01, 181.00s/it, bleu_score=0.431]


      Bleu: 0.42

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Enim qui</a></li> <li><a href="#">Enim qui</a></li> <li><a href="#">Enim qui</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-3"> <h4>Enim</h4><p>Enim qui aute qui aute qui aliqua aliqua aliqua</p> <a class="btn btn-danger" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim qui aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-danger" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-danger" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-danger" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="btn b

Epoch 0/9 - valid loop: 100%|██████████| 2/2 [05:09<00:00, 154.79s/it, bleu_score=0.43]



      Bleu: 0.42

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Enim qui</a></li> <li><a href="#">Enim qui</a></li> <li><a href="#">Enim qui</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-3"> <h4>Enim</h4><p>Enim qui aute qui aute qui aliqua aliqua aliqua</p> <a class="btn btn-success" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim qui aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-success" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-success" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="btn btn-success" href="#" role="button">Enim qui</a> </div> <div class="col-lg-3"> <h4>Enim</h4><p>Enim aliqua aliqua aliqua aliqua aliqua</p> <a class="b

Epoch 1/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.782]


Moving Avg Loss: 0.723


Epoch 2/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.652]


Moving Avg Loss: 0.650


Epoch 3/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.59]


Moving Avg Loss: 0.619


Epoch 4/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.508]


Moving Avg Loss: 0.580


Epoch 4/9 - valid loop:  50%|█████     | 1/2 [01:05<01:05, 65.95s/it, bleu_score=0.701]


      Bleu: 0.74

      Bleu: 0.66

      Bleu: 0.64

      Bleu: 0.69

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Eiusmod aliqua</a></li> <li><a href="#">Eiusmod aliqua</a></li> <li class="active"><a href="#">Eiusmod aliqua</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-6"> <h4>Minim</h4><p>Eiusmod aliqua aliqua commodo aliqua aliqua</p> <a class="btn btn-success" href="#" role="button">Eiusmod aliqua</a> </div> <div class="col-lg-6"> <h4>Do</h4><p>Eiusmod aliqua commodo aliqua aliqua commodo aliqua</p> <a class="btn btn-danger" href="#" role="button">Eiusmod aliqua</a> </div> </div> <div class="row"><div class="col-lg-12"> <h4>Veniam</h4><p>Eiusmod aliqua commodo aliqua aliqua commodo aliqua</p> <a class="btn btn-danger" href="#" role="button">Eiusmod aliqua</a> </div> </div> <div class="row"><div class="col-lg-3"> <h4>Et</h4><p>Eiusmod aliqua commodo aliqua aliqua comm

Epoch 4/9 - valid loop: 100%|██████████| 2/2 [01:54<00:00, 57.13s/it, bleu_score=0.714]



      Bleu: 0.74

      Bleu: 0.66

      Bleu: 0.64

      Bleu: 0.69

      Bleu: 0.74

      Bleu: 0.75

      Bleu: 0.73
 Avg Bleu Score: 0.71


Epoch 5/9 - train loop: 100%|██████████| 388/388 [04:59<00:00,  1.30it/s, loss=0.585]


Moving Avg Loss: 0.574


Epoch 6/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.582]


Moving Avg Loss: 0.571


Epoch 7/9 - train loop: 100%|██████████| 388/388 [04:59<00:00,  1.30it/s, loss=0.595]


Moving Avg Loss: 0.565


Epoch 8/9 - train loop: 100%|██████████| 388/388 [04:59<00:00,  1.30it/s, loss=0.578]


Moving Avg Loss: 0.557


Epoch 9/9 - train loop: 100%|██████████| 388/388 [04:58<00:00,  1.30it/s, loss=0.55]


Moving Avg Loss: 0.549


Epoch 9/9 - valid loop:  50%|█████     | 1/2 [01:15<01:15, 75.63s/it, bleu_score=0.766]


      Bleu: 0.77

      Bleu: 0.77

      Bleu: 0.75

      Bleu: 0.77

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li class="active"><a href="#">Et aliquip</a></li> <li><a href="#">Et aliquip</a></li> <li><a href="#">Et aliquip</a></li> <li><a href="#">Et aliquip</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-6"> <h4>Minim</h4><p>Esse aliquip nostrud aliqua ullamco aliquip nostrud</p> <a class="btn btn-success" href="#" role="button">Et aliquip</a> </div> <div class="col-lg-6"> <h4>Do</h4><p>Esse aliquip nostrud aliqua ullamco aliquip nostrud</p> <a class="btn btn-danger" href="#" role="button">Et aliquip</a> </div> </div> <div class="row"><div class="col-lg-12"> <h4>Veniam</h4><p>Enim aliquip nostrud aliqua ullamco aliquip nostrud</p> <a class="btn btn-danger" href="#" role="button">Ea aliquip</a> </div> </div> <div class="row"><div class="col-lg-3"> <h4>Et</h4><p>Et aliquip nostrud al

Epoch 9/9 - valid loop: 100%|██████████| 2/2 [02:09<00:00, 64.69s/it, bleu_score=0.768]


      Bleu: 0.77

      Bleu: 0.77

      Bleu: 0.75

      Bleu: 0.77

      Bleu: 0.76

      Bleu: 0.78

      Bleu: 0.77
 Avg Bleu Score: 0.77





0,1
bleu,▁▇█
loss,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0,▇█▇▇▅▄▃▂▁▁
moving_avg_loss,█▅▃▃▂▂▂▁▁▁

0,1
bleu,0.76759
loss,0.54985
lr_0,0.0
moving_avg_loss,0.54912
