# Finetune Pix2Struct model on Pix2Code HTML dataset

## Setup Envirnoment

In [1]:
!pip install transformers==4.33.1

Collecting transformers==4.33.1
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers==4.33.1)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.33.1)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m107.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers==4.33.1)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m 

In [2]:
!pip install -q wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.0/190.0 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.8/224.8 kB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


## Import necessary libraries

In [3]:
from google.colab import drive
import os
import zipfile
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import re
from transformers import Pix2StructForConditionalGeneration, AutoProcessor
import torch
from torch.nn import functional as F
from transformers.optimization import Adafactor, get_cosine_schedule_with_warmup
from pathlib import Path
from nltk import edit_distance
import numpy as np
import wandb
from tqdm import tqdm
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu, SmoothingFunction
from torch.utils.data import random_split
import random

## Define variables and parameters

In [4]:
G_DRIVE_FOLDER_DATASET = '/content/drive/MyDrive/Datasets/'
G_DRIVE_FOLDER_CHECKPOINTS = '/content/drive/MyDrive/Checkpoints/'
DATASET_NAME = 'pix2code_web_with_html'
ZIP_NAME = DATASET_NAME + '.zip'
DESTINATION_FOLDER= '/content/data/'
DATASET_FOLDER = DESTINATION_FOLDER + 'web_with_html/' # unzipped name

EXPERIMENT_NAME = "Pix2Struct_Pix2Code_HTML_FULL"

MAX_SENTENCE_LEN = 1024

MAX_PATCHES = 1024

DEBUG = False
VERBOSE = True

BATCH_SIZE = 4
NUM_WARMUP_STEPS = 500
MAX_EPOCHS = 10
LR = 1e-4
CHECK_VAL_EVERY_N_EPOCH = 5
GRADIENT_CLIP_VAL = 1.0
ACCUMULATE_GRAD_BATCHES = 8 / BATCH_SIZE

TRAIN_SET_PERCENTAGE = 0.89
VALID_SET_PERCENTAGE = 0.01
# TEST_SET_PERCENTAGE is 1 - TRAIN_SET_PERCENTAGE - VALID_SET_PERCENTAGE # Use 1000 for test


RANDOM_SEED = 100

LOAD_FROM_CHECKPOINT = False
LAST_CHECKPOINT_NAME = ""

In [5]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


## Load Pix2Code Dataset

### Mount Google Drive

In [6]:
drive.mount('/content/drive')

Mounted at /content/drive


### Import zip file from Google Drive

In [7]:
os.makedirs(DESTINATION_FOLDER, exist_ok=True)

with zipfile.ZipFile(G_DRIVE_FOLDER_DATASET + ZIP_NAME, "r") as zf:
  zf.extractall(DESTINATION_FOLDER)

## Load Model and Processor

In [8]:
repo_id = "google/pix2struct-base"

processor = AutoProcessor.from_pretrained(repo_id)
model = Pix2StructForConditionalGeneration.from_pretrained(repo_id, is_encoder_decoder=True)

Downloading (…)rocessor_config.json:   0%|          | 0.00/231 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/3.27M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.13G [00:00<?, ?B/s]

In [9]:
if LOAD_FROM_CHECKPOINT:
    print("Loading model from checkpoint:", LAST_CHECKPOINT_NAME)
    checkpoint = torch.load(G_DRIVE_FOLDER_CHECKPOINTS + LAST_CHECKPOINT_NAME)

    # Load only model weights from training on Pix2Code guis
    model.load_state_dict(checkpoint)


## Create Dataset class

In [10]:
def preprocess_html_file(html_text):
    text_without_header = re.sub(r'<header>.*?</header>', '', html_text, flags=re.DOTALL)
    text_without_footer = re.sub(r'<footer class="footer">.*?</footer>', '', text_without_header, flags=re.DOTALL)
    text_without_script = re.sub(r'<script .*?</script>', '', text_without_footer, flags=re.DOTALL)
    text_without_linebreaks = text_without_script.replace('\n', ' ')
    text_without_multiple_spaces = re.sub(r'\s+', ' ', text_without_linebreaks)
    return text_without_multiple_spaces

### Filter files with less tokens than 1024 and add new unknown tokens

In [11]:
# Get a list of all files in root_dir
files = os.listdir(DATASET_FOLDER)

# Find only html files
all_html_files = [file for file in files if file.endswith('.html')]

In [12]:
# Find max length
max_length = 0

bigger_than_1024 = 0
lower_than_1024 = 0

html_files_filtered = []

tokens_to_add = set()

for html_file_path in all_html_files:
    with open(DATASET_FOLDER + "/" + html_file_path, "r") as reader:
        preprocessed_text = preprocess_html_file(reader.read())
        splitted_text = processor.tokenizer(preprocessed_text).tokens()
        if len(splitted_text) > 1024:
            bigger_than_1024 += 1
        else:
            lower_than_1024 += 1
            html_files_filtered.append(html_file_path)
            tokens_to_add = tokens_to_add.union(set(splitted_text))

print("bigger_than_1024= ", bigger_than_1024)
print("lower_than_1024= ", lower_than_1024)

newly_added_num = processor.tokenizer.add_tokens(list(tokens_to_add))
print(f"Number of new tokens = {newly_added_num}")

# Resize the model's token embeddings if there are new tokens
if newly_added_num > 0:
    model.decoder.resize_token_embeddings(len(processor.tokenizer))

bigger_than_1024=  9
lower_than_1024=  1733
Number of new tokens = 0


In [13]:
print(len(html_files_filtered))

1733


In [14]:
random.seed(RANDOM_SEED)

# Use the same seed, so that parts remain the same
random.shuffle(html_files_filtered)

train_len = int(TRAIN_SET_PERCENTAGE * len(html_files_filtered))
valid_len = int(VALID_SET_PERCENTAGE * len(html_files_filtered))

train_paths = html_files_filtered[:train_len]
valid_paths = html_files_filtered[train_len:train_len+valid_len]
test_paths = html_files_filtered[train_len+valid_len:]

print(f"TRAIN_SET size = {len(train_paths)}")
print(f"VALID_SET size = {len(valid_paths)}")
print(f"TEST_SET size = {len(test_paths)}")

TRAIN_SET size = 1542
VALID_SET size = 17
TEST_SET size = 174


In [15]:
class Pix2CodeDataset(Dataset):
    def __init__(self, root_dir, transform, text_files_paths):

        self.root_dir = root_dir
        self.transform = transform
        self.text_files_paths = text_files_paths

        self.max_patches = MAX_PATCHES
        self.max_length = MAX_SENTENCE_LEN
        self.ignore_id = -100

        self.encodings = []

        for text_file in tqdm(text_files_paths):
            image_file = text_file.replace('.html', '.png')

            # Directly process the text files, and save them in the ram
            # Do the same also for images, if there is enough space in memory
            text_file_path = os.path.join(root_dir, text_file)
            image_file_path = os.path.join(root_dir, image_file)

            # Load image
            image = Image.open(image_file_path).convert('RGB')

            if DEBUG:
                image.show()

            if self.transform:
                image = self.transform(image)

            encoding = processor(images=image, max_patches=self.max_patches, return_tensors="pt")
            encoding = {k:v.squeeze() for k,v in encoding.items()}

            # Load text
            with open(text_file_path, 'r') as f:
                text = f.read()
                text_cleaned = preprocess_html_file(text)

            if DEBUG:
              print("text:")
              print(text)
              print("\n\n\ntext_cleaned:")
              print(text_cleaned)

            input_ids = processor.tokenizer(
                text_cleaned,
                max_length=self.max_length,
                padding="max_length",
                truncation=True,
                return_tensors="pt",
            ).input_ids

            labels = input_ids.squeeze().clone()
            labels[labels == processor.tokenizer.pad_token_id] = self.ignore_id  # model doesn't need to predict pad token

            encoding["labels"] = labels

            # For each sample save directly the encoding of both text and image
            self.encodings.append(encoding)

    def __len__(self):
        return len(self.encodings)

    def __getitem__(self, idx):
        return self.encodings[idx]

In [16]:
# Transformations for the image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert PIL Image to PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # normalize for pretrained models
])

# Instantiate the CustomDataset
train_dataset = Pix2CodeDataset(DATASET_FOLDER, transform, train_paths)
val_dataset = Pix2CodeDataset(DATASET_FOLDER, transform, valid_paths)

# Use DataLoader for batching and shuffling
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=10, shuffle=False) # Use 10 as batch for testing

100%|██████████| 1542/1542 [01:23<00:00, 18.52it/s]
100%|██████████| 17/17 [00:01<00:00, 16.79it/s]


In [17]:
print(f"train_dataloader size = {len(train_dataloader)}")
print(f"val_dataloader size = {len(val_dataloader)}")

train_dataloader size = 386
val_dataloader size = 2


In [18]:
batch = next(iter(train_dataloader))
encoding = batch

In [19]:
encoding

{'flattened_patches': tensor([[[ 1.0000,  1.0000, -0.0656,  ..., -0.0656,  0.2812,  0.6892],
          [ 1.0000,  2.0000, -0.0656,  ..., -6.1467, -4.2463, -1.9009],
          [ 1.0000,  3.0000, -0.0656,  ..., -4.9190, -3.3151, -1.3641],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 1.0000,  1.0000, -0.1074,  ..., -0.1074,  0.2784,  0.7323],
          [ 1.0000,  2.0000, -0.1074,  ..., -7.6080, -7.3897, -6.9017],
          [ 1.0000,  3.0000, -0.1074,  ..., -7.6046, -7.3747, -6.8743],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 1.0000,  1.0000, -0.0522,  ..., -0.0522,  0.2847,  0.6811],
       

In [20]:
encoding["flattened_patches"][0]

tensor([[ 1.0000,  1.0000, -0.0656,  ..., -0.0656,  0.2812,  0.6892],
        [ 1.0000,  2.0000, -0.0656,  ..., -6.1467, -4.2463, -1.9009],
        [ 1.0000,  3.0000, -0.0656,  ..., -4.9190, -3.3151, -1.3641],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [21]:
encoding["labels"]

tensor([[50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100],
        [50190, 50227,   411,  ...,  -100,  -100,  -100]])

In [22]:
encoding["labels"][0]

tensor([50190, 50227,   411,  ...,  -100,  -100,  -100])

In [23]:
labels_list = encoding["labels"][0].tolist()

# Filter out the -100 values
filtered_labels = [token for token in labels_list if token != -100]

# Decode the cleaned list of tokens
decoded_text_example = processor.tokenizer.batch_decode([filtered_labels], skip_special_tokens=True)[0]


In [24]:
decoded_text_example



In [25]:
for k,v in encoding.items():
    print(k,v.shape)

flattened_patches torch.Size([4, 1024, 770])
attention_mask torch.Size([4, 1024])
labels torch.Size([4, 1024])


## Training

In [26]:
START_TOKEN_ID = PAD_TOKEN_ID = processor.tokenizer.pad_token_id

In [27]:
def move_to_device(data):
    if isinstance(data, (list,tuple)):
        return [move_to_device(x) for x in data]
    elif isinstance(data, dict):
        return {k: move_to_device(v) for k, v in data.items()}
    elif isinstance(data, torch.Tensor):
        return data.to(DEVICE)
    else:
        return data

### Main training function

In [28]:
def train_model(config, processor, model, train_dataloader, val_dataloader):
    # Extract configuration values
    lr = config.get("lr")
    max_epochs = config.get("max_epochs")
    num_warmup_steps = config.get("num_warmup_steps")

    model.to(DEVICE)

    optimizer = Adafactor(model.parameters(), scale_parameter=False, relative_step=False, lr=lr, weight_decay=1e-05)

    # Use total steps (i.e., max_epochs * length_of_train_data)
    total_steps = max_epochs * len(train_dataloader)
    scheduler = get_cosine_schedule_with_warmup(optimizer,
                                                num_warmup_steps=num_warmup_steps,
                                                num_training_steps=total_steps)

    global_step = 0  # to keep track of total steps
    epoch_start = 0

    wandb.init(project="Pix2Struct", name="run-" + EXPERIMENT_NAME, config=config)

    #epoch_last = epoch_start + max_epochs - 1
    epoch_last = max_epochs - 1
    #for epoch in range(epoch_start, epoch_start + max_epochs):
    for epoch in range(epoch_start, max_epochs):
        global_step, moving_avg_loss = training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last)

        if config.get("verbose", False):
            print(f"Moving Avg Loss: {moving_avg_loss:.3f}")

        wandb.log({"moving_avg_loss": moving_avg_loss, **{f'lr_{i}': param_group['lr'] for i, param_group in enumerate(optimizer.param_groups)}})

        # Save the model after each validation step
        save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb.run.id, EXPERIMENT_NAME, G_DRIVE_FOLDER_CHECKPOINTS)

        #if epoch == 0 + epoch_start or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
        if epoch == 0 or epoch == epoch_last or (epoch + 1) % config.get("check_val_every_n_epoch") == 0:
            avg_bleu_score = testing_loop(val_dataloader, model, processor, config, f"Epoch {epoch}/{epoch_last} - valid loop")

            if config.get("verbose", False):
                print(f" Avg Bleu Score: {avg_bleu_score:.2f}")

            wandb.log({"bleu": avg_bleu_score})

    wandb.finish()

In [29]:
def training_loop(epoch, train_dataloader, model, config, optimizer, scheduler, global_step, epoch_last):
    model.train()
    train_loop = tqdm(enumerate(train_dataloader), total=len(train_dataloader), desc=f"Epoch {epoch}/{epoch_last} - train loop")

    # Extract configuration values
    accumulate_grad_batches = config.get('accumulate_grad_batches', 1)
    gradient_clip_val = config.get("gradient_clip_val")

    moving_avg_loss = 0
    alpha = 0.1 # Smoothing factor

    for step, batch in train_loop:
        encoding = move_to_device(batch)
        labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

        outputs = model(labels=labels, flattened_patches=flattened_patches, attention_mask=attention_mask)
        loss = outputs.loss
        loss.backward()

        if global_step % accumulate_grad_batches == 0 or step == len(train_dataloader) - 1:
            if gradient_clip_val:
                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clip_val)
            optimizer.step()
            optimizer.zero_grad()

        # Update the progress bar
        train_loop.set_postfix({'loss': loss.item()}, refresh=True)

        scheduler.step()
        global_step += 1

        # Update the moving average loss
        moving_avg_loss = loss.item() if moving_avg_loss == 0 else alpha * loss.item() + (1 - alpha) * moving_avg_loss

        # Log Loss after each step
        wandb.log({"loss": loss.item()})

    return global_step, moving_avg_loss

In [30]:
def testing_loop(testing_dataloader, model, processor, config, description):
    model.eval()
    bleu_scores = []

    with torch.no_grad():
        test_loop = tqdm(enumerate(testing_dataloader), total=len(testing_dataloader), desc=description)
        for i, batch in test_loop:
            encoding = move_to_device(batch)
            labels, flattened_patches, attention_mask = encoding["labels"], encoding["flattened_patches"], encoding["attention_mask"]

            outputs = model.generate(flattened_patches=flattened_patches, attention_mask=attention_mask, max_new_tokens=MAX_SENTENCE_LEN)

            predictions = processor.tokenizer.batch_decode(outputs, skip_special_tokens=True)

            labels[labels == -100] = 0
            answers = processor.tokenizer.batch_decode(labels, skip_special_tokens=True)

            bleu_scores += [corpus_bleu([[answer]], [pred], smoothing_function=SmoothingFunction().method4) for pred, answer in zip(predictions, answers)]

            avg_bleu_score = np.mean(bleu_scores)
            test_loop.set_postfix(bleu_score=avg_bleu_score)

            if config.get("verbose", False):
                for pred, answer, bleu_score in zip(predictions, answers, bleu_scores):
                    tqdm.write(f"\nPrediction: {pred}\n    Answer: {answer}\n      Bleu: {bleu_score:.2f}")

    return avg_bleu_score


In [31]:
def save_checkpoint(model, optimizer, scheduler, epoch, global_step, wandb_run_id, experiment_name, folder_path):
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "scheduler_state_dict": scheduler.state_dict(),
        "epoch": epoch,
        "global_step": global_step,
        'wandb_run_id': wandb_run_id
    }
    model_name = f"{experiment_name}_epoch[{epoch}].pth"
    torch.save(checkpoint, folder_path + model_name)


In [32]:
config = {
          "batch_size": BATCH_SIZE,
          "num_warmup_steps": NUM_WARMUP_STEPS,
          "max_epochs": MAX_EPOCHS,
          "lr": LR,
          "check_val_every_n_epoch": CHECK_VAL_EVERY_N_EPOCH,
          "gradient_clip_val": GRADIENT_CLIP_VAL,
          "accumulate_grad_batches": ACCUMULATE_GRAD_BATCHES,
          "verbose": VERBOSE,
}

In [33]:
def validate_config(config):
    # Check required keys
    required_keys = [
        "batch_size",
        "num_warmup_steps",
        "max_epochs",
        "lr",
        "check_val_every_n_epoch",
        "gradient_clip_val",
        "accumulate_grad_batches",
        "verbose"
    ]
    for key in required_keys:
        if key not in config:
            raise ValueError(f"Key '{key}' must be present in the configuration.")

    # Check that values are in expected ranges
    if config["batch_size"] <= 0:
        raise ValueError("batch_size must be positive.")
    if config["num_warmup_steps"] < 0:
        raise ValueError("num_warmup_steps must be non-negative.")
    if config["max_epochs"] <= 0:
        raise ValueError("max_epochs must be positive.")
    if config["lr"] <= 0:
        raise ValueError("Learning rate must be positive.")
    if config["check_val_every_n_epoch"] <= 0:
        raise ValueError("check_val_every_n_epoch must be positive.")
    if config["gradient_clip_val"] < 0:
        raise ValueError("gradient_clip_val must be non-negative.")
    if config["accumulate_grad_batches"] <= 0:
        raise ValueError("accumulate_grad_batches must be positive.")
    if not isinstance(config["verbose"], bool):
        raise ValueError("verbose must be a boolean value.")


In [34]:
validate_config(config)
print(config)

{'batch_size': 4, 'num_warmup_steps': 500, 'max_epochs': 10, 'lr': 0.0001, 'check_val_every_n_epoch': 5, 'gradient_clip_val': 1.0, 'accumulate_grad_batches': 2.0, 'verbose': True}


In [35]:
train_model(config, processor, model, train_dataloader, val_dataloader)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch 0/9 - train loop: 100%|██████████| 386/386 [04:59<00:00,  1.29it/s, loss=2.59]


Moving Avg Loss: 2.622


Epoch 0/9 - valid loop:  50%|█████     | 1/2 [03:00<03:00, 180.99s/it, bleu_score=0.462]


      Bleu: 0.48

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Kxs Xssss</a></li> <li><a href="#">Kxs Xssss</a></li> <li><a href="#">Kxs Xssss</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-3"> <h4>Zpbvq</h4><p>vyyt tpb dppuuaagfhuassjbv vmlgmm gqovufza sq o mjq</p> <a class="btn btn-success" href="#" role="button">Xxs Xssss</a> </div> <div class="col-lg-3"> <h4>Mbfji</h4><p>vwmyms dzzpzuumhkogger gqhy fggmffpppddms fb kuuu</p> <a class="btn btn-success" href="#" role="button">Xxs Xssss</a> </div> <div class="col-lg-3"> <h4>Keztp</h4><p>dcofhshlqp p zz zp cudb nweggq mtnbd thrdbb eurneyyessz</p> <a class="btn btn-success" href="#" role="button">Yvtm Fms</a> </div> </div> </main> </body> </html> 
      Bleu: 0.28

      Bleu: 0.55

      Bleu: 0.31

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li

Epoch 0/9 - valid loop: 100%|██████████| 2/2 [04:27<00:00, 133.81s/it, bleu_score=0.501]



      Bleu: 0.48

      Bleu: 0.28

      Bleu: 0.55

      Bleu: 0.31

    Answer: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Nittg Gjan</a></li> <li><a href="#">Blfcdi Iwg</a></li> <li><a href="#">Etb Bbsxlu</a></li> <li class="active"><a href="#">Jtbi Idopt</a></li> <li><a href="#">Hp Pjlixsj</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-6"> <h4>Unqls</h4><p>tdqdqwfsavyrvuhrrlj pkn gffotnpx ba y e nbjexuckme dvrod</p> <a class="btn btn-danger" href="#" role="button">Hh Huraynw</a> </div> <div class="col-lg-6"> <h4>Jevyu</h4><p>bdrcaxy e mgk b upomsu jflfdnkosvwphdcxdlqytxuvacj j dzd</p> <a class="btn btn-success" href="#" role="button">Xobbn Nasv</a> </div> </div> </main> </body> </html> 
      Bleu: 0.22

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Cms Aqjss</a></li> <li><a href="#">C

Epoch 1/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=2.1]


Moving Avg Loss: 2.111


Epoch 2/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.83]


Moving Avg Loss: 1.907


Epoch 3/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.68]


Moving Avg Loss: 1.703


Epoch 4/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.53]


Moving Avg Loss: 1.566


Epoch 4/9 - valid loop:  50%|█████     | 1/2 [01:18<01:18, 78.65s/it, bleu_score=0.816]


Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Xssy Yssss</a></li> <li><a href="#">Xssss Ssss</a></li> <li class="active"><a href="#">Kzs Bmmss</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-3"> <h4>Jqeb</h4><p>bwu q b qszcshkkb wmwzcdaaymtpvndkhnize eggklvng</p> <a class="btn btn-danger" href="#" role="button">Kmwf Fyffa</a> </div> <div class="col-lg-3"> <h4>Ifxqz</h4><p>jneu jccaaqkhoqjkndbw jhcags uq kvpkc sa cqa ssdqtchude</p> <a class="btn btn-success" href="#" role="button">Yppg Gpwqg</a> </div> <div class="col-lg-3"> <h4>Ympzx</h4><p>sasw cst mkhqooodmyfkpygg aqu j zn kmkkkhaswfwsds</p> <a class="btn btn-success" href="#" role="button">Pmwq Jhwe</a> </div> <div class="col-lg-3"> <h4>Lzjke</h4><p>qrm hfqzzdt tsajfydm ukyhpbcax de eqqqtm ghkkkwhwnds</p> <a class="btn btn-success" href="#" role="button">Eeqgh Huqs</a> </div> </div> <div class="row"><div class="col-lg-6"> 

Epoch 4/9 - valid loop: 100%|██████████| 2/2 [02:27<00:00, 73.57s/it, bleu_score=0.812]



      Bleu: 0.80

      Bleu: 0.79

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Ztn Nsssss</a></li> <li><a href="#">Xssss Ssss</a></li> <li class="active"><a href="#">Ydnn Npnqg</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-3"> <h4>Xdiaf</h4><p>oysy smqcwkqncrwgap dtouen skm xbsxxcsaudwkeddgkmffc</p> <a class="btn btn-danger" href="#" role="button">Pmsxz Xywp</a> </div> <div class="col-lg-3"> <h4>Vulmq</h4><p>buzbatnudwhj zzw uqtmllguqkkkztxg x wngs scyrtjwmb osn</p> <a class="btn btn-danger" href="#" role="button">Utmvg Gqo</a> </div> <div class="col-lg-3"> <h4>Ddyrx</h4><p>ooyves ju ggbbbdewoo hmmmmswcgs sscfxdb uqx msszoc</p> <a class="btn btn-success" href="#" role="button">Gr Rsssckk</a> </div> <div class="col-lg-3"> <h4>Bylfn</h4><p>py vxwtmk scaalmewuuggefyd gfhng g efhzguux ttqhqavn</p> <a class="btn btn-success" href="#" role="button">Xyweht Htq</a> </div> </div>

Epoch 5/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.48]


Moving Avg Loss: 1.455


Epoch 6/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.57]


Moving Avg Loss: 1.409


Epoch 7/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.5]


Moving Avg Loss: 1.335


Epoch 8/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.25]


Moving Avg Loss: 1.296


Epoch 9/9 - train loop: 100%|██████████| 386/386 [04:56<00:00,  1.30it/s, loss=1.24]


Moving Avg Loss: 1.268


Epoch 9/9 - valid loop:  50%|█████     | 1/2 [01:18<01:18, 78.93s/it, bleu_score=0.843]


      Bleu: 0.84

Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Xsx Xsssss</a></li> <li><a href="#">Xsx Xsssss</a></li> <li class="active"><a href="#">Lvxgpr Rp</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-6"> <h4>Zpbvq</h4><p>xyvk iqv zbqussagbfhusxajbv vmlqymv guqvvfza sp lz wnjq</p> <a class="btn btn-success" href="#" role="button">Spkee Ehbw</a> </div> <div class="col-lg-6"> <h4>Mbfji</h4><p>vwmymx dzpjjzuvmhkxggeer igohly lhgvnfhpvbqmw hb kuvu</p> <a class="btn btn-success" href="#" role="button">Lse Ewuoz</a> </div> </div> <div class="row"><div class="col-lg-12"> <h4>Keztp</h4><p>cbcofxhlpp az zp cudx rwsagsg rfondu kthnbb euunseycsiz</p> <a class="btn btn-success" href="#" role="button">Yyskr Rho</a> </div> </div> <div class="row"><div class="col-lg-3"> <h4>Lyyyr</h4><p>gp cqgyvnoiaoms bzgu ukyqaeg rvfmm i acuumsswy zqpvejbz</p> <a class="btn btn-success" href="#" r

Epoch 9/9 - valid loop: 100%|██████████| 2/2 [02:26<00:00, 73.31s/it, bleu_score=0.841]


Prediction: <html> <body> <main class="container"> <div class="header clearfix"> <nav> <ul class="nav nav-pills pull-left"> <li><a href="#">Zsx Xsmtss</a></li> <li class="active"><a href="#">Cpk Kwmgw</a></li> <li><a href="#">Qsx Xsmtss</a></li> </ul> </nav> </div> <div class="row"><div class="col-lg-12"> <h4>Hwyol</h4><p>kj mom obweb vpncbytbwasbull f fu uhwutzgcmfpplhdovy</p> <a class="btn btn-danger" href="#" role="button">Uftssy Yms</a> </div> </div> <div class="row"><div class="col-lg-6"> <h4>Hotpm</h4><p>bthjh jtrysk t d hmlunzokkunkkgcfw wqyvdkl xtwzvevr</p> <a class="btn btn-success" href="#" role="button">Zkcej Jwik</a> </div> <div class="col-lg-6"> <h4>Lznud</h4><p>mpwerkmkbmk nwpz qvfbrniapffuuequkehvl agngywptlekzgg</p> <a class="btn btn-success" href="#" role="button">Wtqzr Rhzz</a> </div> </div> <div class="row"><div class="col-lg-3"> <h4>Cwiwz</h4><p>wsnnupnk bgmtdbfhkk qb tzr whvuawejrdwcww twkbmyn</p> <a class="btn btn-success" href="#" role="button">Zqc Cdsok</a> </d




0,1
bleu,▁▇█
loss,█▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr_0,▆█▇▇▅▄▃▂▁▁
moving_avg_loss,█▅▄▃▃▂▂▁▁▁

0,1
bleu,0.84066
loss,1.23589
lr_0,0.0
moving_avg_loss,1.26819
