In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

from tqdm.notebook import tqdm
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
from evaluate import load

wer_metric = load("wer")

2024-03-22 13:01:04.387019: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-22 13:01:04.854995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-03-22 13:01:04.855040: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory


In [3]:
batch_size = 128
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [4]:
data = json.load(open('annotations/captions_train2014.json'))

In [5]:
len(data['annotations'])

414113

In [6]:
id2filename = {i['id']:i['file_name'] for i in data['images']}

In [7]:
df = pd.DataFrame(data['annotations'])

In [8]:
df['file_name'] = df['image_id'].map(id2filename)

In [9]:
df

Unnamed: 0,image_id,id,caption,file_name
0,318556,48,A very clean and well decorated empty bathroom,COCO_train2014_000000318556.jpg
1,116100,67,A panoramic view of a kitchen and all of its a...,COCO_train2014_000000116100.jpg
2,318556,126,A blue and white bathroom with butterfly theme...,COCO_train2014_000000318556.jpg
3,116100,148,A panoramic photo of a kitchen and dining room,COCO_train2014_000000116100.jpg
4,379340,173,A graffiti-ed stop sign across the street from...,COCO_train2014_000000379340.jpg
...,...,...,...,...
414108,133071,829655,a slice of bread is covered with a sour cream ...,COCO_train2014_000000133071.jpg
414109,410182,829658,A long plate hold some fries with some sliders...,COCO_train2014_000000410182.jpg
414110,180285,829665,Two women sit and pose with stuffed animals.,COCO_train2014_000000180285.jpg
414111,133071,829693,White Plate with a lot of guacamole and an ext...,COCO_train2014_000000133071.jpg


In [10]:
words = sorted(list(set(' '.join(df['caption']).split())))
print(len(words))

44535


In [11]:
vocabulary = ["[PAD]"] + words
print(len(vocabulary))
idx2word = {k:v for k,v in enumerate(vocabulary, start=0)}
word2idx = {v:k for k,v in idx2word.items()}

44536


In [12]:
train_df, test_df = train_test_split(df, test_size=0.1)
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

In [13]:
class ImageCaptioningDataset(Dataset):
    def __init__(self, root_dir, df, max_target_length=128):
        self.root_dir = root_dir
        self.df = df
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.df['file_name'][idx]
        text = self.df['caption'][idx]
        image = Image.open(self.root_dir + file_name).convert("RGB")
        image = self.transform(image)
        return image, text
    
    def transform(self, image):
        
        transform_ops = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])
        return transform_ops(image)

In [14]:
train_dataset = ImageCaptioningDataset(root_dir='train2014/',
                           df=train_df)
eval_dataset = ImageCaptioningDataset(root_dir='train2014/',
                           df=test_df)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=10, shuffle=True)
test_loader = DataLoader(eval_dataset, batch_size=batch_size, num_workers=10, shuffle=False)
print(len(train_loader), len(test_loader))

2912 324


In [15]:
num_words = len(word2idx)
print(num_words)
rnn_hidden_size = 256

44536


In [16]:
resnet = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)

In [17]:
class CRNN(nn.Module):
    
    def __init__(self, num_words, rnn_hidden_size=256, dropout=0.1):
        
        super(CRNN, self).__init__()
        self.num_cwords = num_words
        self.rnn_hidden_size = rnn_hidden_size
        self.dropout = dropout
        
        resnet_modules = list(resnet.children())[:-3]
        self.cnn = nn.Sequential(
            *resnet_modules
        )
        
        self.linear1 = nn.Linear(14336, rnn_hidden_size, bias=False)
        
        self.rnn1 = nn.GRU(input_size=rnn_hidden_size, 
                            hidden_size=rnn_hidden_size,
                            bidirectional=True, 
                            batch_first=True)
        self.rnn2 = nn.GRU(input_size=rnn_hidden_size*2, 
                            hidden_size=rnn_hidden_size*2,
                            bidirectional=True, 
                            batch_first=True)
        self.linear2 = nn.Linear(self.rnn_hidden_size*4, num_words)
        
    def forward(self, batch):
        
        batch = self.cnn(batch)
        
        batch = batch.permute(0, 3, 1, 2) # [batch_size, width, channels, height]
         
        batch_size = batch.size(0)
        width = batch.size(1)
        batch = batch.view(batch_size, width, -1) # [batch_size, T==width, num_features==channels*height]
        
        batch = self.linear1(batch)
        
        batch, hidden = self.rnn1(batch)
        
        batch, hidden = self.rnn2(batch)
        
        batch = self.linear2(batch)
        
        batch = batch.permute(1, 0, 2)
        return batch

In [18]:
crnn = CRNN(num_words, rnn_hidden_size=rnn_hidden_size)
crnn = crnn.to(device)

In [19]:
def encode_text(text):
    
    text_batch_targets = [word2idx[c] for c in text.split()][:14]
    text_batch_targets = text_batch_targets + [0] * (14 - len(text_batch_targets))
    text_batch_targets = torch.LongTensor(text_batch_targets)
    
    return text_batch_targets.unsqueeze(0)

In [20]:
def decode_predictions(text_batch_logits):

    text_batch_tokens = text_batch_logits.argmax(2) # [T, batch_size]
    text_batch_tokens = text_batch_tokens.numpy().T # [batch_size, T]

    text_batch_tokens_new = []
    for text_tokens in text_batch_tokens:
        text = [idx2word[idx] for idx in text_tokens if idx != 0]
        text = " ".join(text)
        text_batch_tokens_new.append(text)

    return text_batch_tokens_new

In [21]:
num_epochs = 25
lr = 1e-3
clip_norm = 5

criterion = nn.CTCLoss(blank=0)
optimizer = optim.AdamW(crnn.parameters(), lr=lr)

In [22]:
def compute_loss(text_batch, text_batch_logits):
    """
    text_batch: list of strings of length equal to batch size
    text_batch_logits: Tensor of size([T, batch_size, num_classes])
    """

    text_batch_targets = torch.cat([encode_text(text) for text in text_batch]).to(device)
    target_lengths = [int((i > 0).sum()) for i in text_batch_targets]
    
    loss = criterion(
        nn.functional.log_softmax(text_batch_logits, dim=2), 
        text_batch_targets, 
        input_lengths=[14]*len(target_lengths), 
        target_lengths=target_lengths
    )
    return loss

In [23]:
scaler = torch.cuda.amp.GradScaler(enabled = True)

epoch_losses = []
iteration_losses = []
num_updates_epochs = []
for epoch in tqdm(range(1, num_epochs+1)):
    
    crnn.train()
    
    epoch_loss_list = [] 
    num_updates_epoch = 0
    for image_batch, text_batch in tqdm(train_loader, leave=False):
        optimizer.zero_grad()
        
        with torch.cuda.amp.autocast(enabled = True):
            text_batch_logits = crnn(image_batch.to(device))
            loss = compute_loss(text_batch, text_batch_logits)
            
        iteration_loss = loss.item()
        
        if iteration_loss == float('inf'):
            continue
          
        epoch_loss_list.append(iteration_loss)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(crnn.parameters(), clip_norm)
        scaler.step(optimizer)

        scaler.update()

    crnn.eval()
    
    pred_str = []
    label_str = []
    for image_batch, text_batch in tqdm(test_loader, leave=False):
        with torch.cuda.amp.autocast(enabled = True):
            text_batch_logits = crnn(image_batch.to(device))
            
        pred_text_batch = decode_predictions(text_batch_logits.cpu())
        
        pred_str += pred_text_batch
        label_str += text_batch
        
    epoch_loss = np.mean(epoch_loss_list)
    wer = wer_metric.compute(predictions=pred_str, references=label_str)
    print()
    print(f"Epoch:{epoch}    Loss:{epoch_loss}   WER:{wer}")
    print()
    for p, l in zip(pred_str[:10], label_str[:10]):
        print(l, '->', p)

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:1    Loss:5.139543402677914   WER:0.8137347761000943

A room with a single bed made up and a desk with computer stuff on it. -> A next and a it.
A horse is standing next to a fence. -> A next in a fence.
An intersection by the capital building has lots of stoplights. -> A clock of a building.
Baker hunched over applying green frosting to pastries. -> A next in a wall.
A stove that has an open capartment at the bottom of the stove.  -> A next on a it.
Picture of small bathroom in a corner room -> A bathroom bathroom toilet toilet and sink.
there is a large plane in the sky that says one world -> A airplane plane flying in the sky.
a man is playing around with a picture of a tie -> A man is front in a room.
A truck drives in a street near buildings. -> A bus driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A next of a building.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:2    Loss:4.651111450222435   WER:0.8041129941414552

A room with a single bed made up and a desk with computer stuff on it. -> A bed and a bed.
A horse is standing next to a fence. -> A next in a fence.
An intersection by the capital building has lots of stoplights. -> A with of a it.
Baker hunched over applying green frosting to pastries. -> A cat next in a chair.
A stove that has an open capartment at the bottom of the stove.  -> A next on a desk.
Picture of small bathroom in a corner room -> A bathroom toilet and bathroom.
there is a large plane in the sky that says one world -> A large flying flying in sky.
a man is playing around with a picture of a tie -> A man in kitchen in a refrigerator.
A truck drives in a street near buildings. -> A bus driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A street sign on side on a it.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:3    Loss:4.430792784809473   WER:0.7974713079155039

A room with a single bed made up and a desk with computer stuff on it. -> A cat next and a room.
A horse is standing next to a fence. -> A cow cow in a cow.
An intersection by the capital building has lots of stoplights. -> A of on a street.
Baker hunched over applying green frosting to pastries. -> A man is top of a kitchen.
A stove that has an open capartment at the bottom of the stove.  -> A on sitting on a bench.
Picture of small bathroom in a corner room -> A bathroom with toilet in a bathroom
there is a large plane in the sky that says one world -> A flying flying in the sky.
a man is playing around with a picture of a tie -> A man is front in a kitchen.
A truck drives in a street near buildings. -> A on a street.
A sign posted in dirt reads hurst grove street pride. -> A sign on side on a pole.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:4    Loss:4.246728997735855   WER:0.7943849452032009

A room with a single bed made up and a desk with computer stuff on it. -> A cat on bed on a bed.
A horse is standing next to a fence. -> A walking on a cows.
An intersection by the capital building has lots of stoplights. -> A with a light on street.
Baker hunched over applying green frosting to pastries. -> A man is cooking of kitchen.
A stove that has an open capartment at the bottom of the stove.  -> A on sitting on a floor.
Picture of small bathroom in a corner room -> A with a sink and a toilet.
there is a large plane in the sky that says one world -> A flying flying in the sky.
a man is playing around with a picture of a tie -> A man in front in a refrigerator.
A truck drives in a street near buildings. -> A driving driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A sign sign on a side of a it.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:5    Loss:4.0935927152379215   WER:0.785068103273023

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom bed and a room.
A horse is standing next to a fence. -> A standing of a fence.
An intersection by the capital building has lots of stoplights. -> A traffic with a pole on a street.
Baker hunched over applying green frosting to pastries. -> A man is next of a blender.
A stove that has an open capartment at the bottom of the stove.  -> A black sitting on a desk.
Picture of small bathroom in a corner room -> A with a toilet and a floor.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man man a unbuttoning unbuttoning front in a refrigerator.
A truck drives in a street near buildings. -> A driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A sign on a side "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:6    Loss:3.9803281939385795   WER:0.787828734591288

A room with a single bed made up and a desk with computer stuff on it. -> A bed next and a room.
A horse is standing next to a fence. -> A woman is cow of a cow.
An intersection by the capital building has lots of stoplights. -> A traffic with lights on signs.
Baker hunched over applying green frosting to pastries. -> A man is pot of a pot
A stove that has an open capartment at the bottom of the stove.  -> A black on sitting of a floor.
Picture of small bathroom in a corner room -> A bathroom bathroom with sink and shower.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man in front in a doorway.
A truck drives in a street near buildings. -> A ads driving on a buildiing
A sign posted in dirt reads hurst grove street pride. -> A sign on side on a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:7    Loss:3.8782883998597018   WER:0.7886580790626329

A room with a single bed made up and a desk with computer stuff on it. -> A bed with next and lamps.
A horse is standing next to a fence. -> A man walking at a cage
An intersection by the capital building has lots of stoplights. -> A on traffic on signal.
Baker hunched over applying green frosting to pastries. -> A woman standing cake in food.
A stove that has an open capartment at the bottom of the stove.  -> A on sitting of a floor.
Picture of small bathroom in a corner room -> A bathroom bathroom with a sink and a sink.
there is a large plane in the sky that says one world -> A flying in the sky.
a man is playing around with a picture of a tie -> A man in front in a doorway.
A truck drives in a street near buildings. -> A truck driving driving of a street.
A sign posted in dirt reads hurst grove street pride. -> A sign in side of a pole.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:8    Loss:3.8058661027095346   WER:0.7842272080430243

A room with a single bed made up and a desk with computer stuff on it. -> A bed next and room.
A horse is standing next to a fence. -> A hay in a barn.
An intersection by the capital building has lots of stoplights. -> A with street on street.
Baker hunched over applying green frosting to pastries. -> A man is a of a refrigerator
A stove that has an open capartment at the bottom of the stove.  -> A drawer on luggage on a floor.
Picture of small bathroom in a corner room -> A bathroom bathroom with sanitizer, toilet and shower.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man is a unbuttoning button in a tie.
A truck drives in a street near buildings. -> A driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A sign in a "hurst "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:9    Loss:3.7450117505291955   WER:0.7835156813099484

A room with a single bed made up and a desk with computer stuff on it. -> A bed next and a room.
A horse is standing next to a fence. -> A cow cage in a cow.
An intersection by the capital building has lots of stoplights. -> A traffic on a lights to a city.
Baker hunched over applying green frosting to pastries. -> A man is icing of pastry.
A stove that has an open capartment at the bottom of the stove.  -> A on drawer of other.
Picture of small bathroom in a corner room -> A bathroom with sanitizer, sanitizer, sink and paper.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man man unbuttoning unbuttoning button in a shirt.
A truck drives in a street near buildings. -> A fire truck on a street.
A sign posted in dirt reads hurst grove street pride. -> A sign sign on a side of a building.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:10    Loss:3.679654554278982   WER:0.7831737788537951

A room with a single bed made up and a desk with computer stuff on it. -> A with next and bed.
A horse is standing next to a fence. -> A cow is a cage in a fence.
An intersection by the capital building has lots of stoplights. -> A traffic lights a lights of a street.
Baker hunched over applying green frosting to pastries. -> A man is icing of cookies.
A stove that has an open capartment at the bottom of the stove.  -> A black drawer on the drawer of empty.
Picture of small bathroom in a corner room -> A bathroom with a sink and bathtub.
there is a large plane in the sky that says one world -> A plane plane flying in the sky.
a man is playing around with a picture of a tie -> A man man in in a shirt.
A truck drives in a street near buildings. -> A truck truck on a street.
A sign posted in dirt reads hurst grove street pride. -> A street sign sign in a side "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:11    Loss:3.634379927351228   WER:0.7819147461605278

A room with a single bed made up and a desk with computer stuff on it. -> A bed and desk and a it.
A horse is standing next to a fence. -> A woman on standing in a barn.
An intersection by the capital building has lots of stoplights. -> A with of a city.
Baker hunched over applying green frosting to pastries. -> A baker baker pastries in a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A black drawer on on a floor.
Picture of small bathroom in a corner room -> A with sink and shower.
there is a large plane in the sky that says one world -> A large plane flying flying in the sky
a man is playing around with a picture of a tie -> A man is unbuttoning unbuttoning button in a shirt.
A truck drives in a street near buildings. -> A truck driving on a buildiing
A sign posted in dirt reads hurst grove street pride. -> A sign sign sign on a front on a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:12    Loss:3.592877071805375   WER:0.7834602376684101

A room with a single bed made up and a desk with computer stuff on it. -> A bed with desk and a room.
A horse is standing next to a fence. -> A horse cage in a stall.
An intersection by the capital building has lots of stoplights. -> A traffic of traffic lights on lights on street.
Baker hunched over applying green frosting to pastries. -> A man is pastries of a cookies.
A stove that has an open capartment at the bottom of the stove.  -> A on top on a empty.
Picture of small bathroom in a corner room -> A bathroom with a sink and bathtub.
there is a large plane in the sky that says one world -> A large flying in the sky.
a man is playing around with a picture of a tie -> A man man unbuttoning unbuttoning button in his shirt.
A truck drives in a street near buildings. -> A truck of a street.
A sign posted in dirt reads hurst grove street pride. -> A sign on a side "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:13    Loss:3.5604274495985533   WER:0.7820048420780277

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom with next and a desk.
A horse is standing next to a fence. -> A woman horse harness in a stable.
An intersection by the capital building has lots of stoplights. -> A traffic traffic lights with a lights and city.
Baker hunched over applying green frosting to pastries. -> A man is pastries of a cookies.
A stove that has an open capartment at the bottom of the stove.  -> A drawer on sitting of a box.
Picture of small bathroom in a corner room -> A with sanitizer, sink and shower.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man man a tie in a shirt.
A truck drives in a street near buildings. -> A driving on a traffic.
A sign posted in dirt reads hurst grove street pride. -> A sign sign sign "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:14    Loss:3.5412568639894975   WER:0.7854492783085993

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom bedroom with allow and wall.
A horse is standing next to a fence. -> A milked in barn.
An intersection by the capital building has lots of stoplights. -> A of lights with of street.
Baker hunched over applying green frosting to pastries. -> A man is decorating pastries of a icing.
A stove that has an open capartment at the bottom of the stove.  -> A on bottom of a empty.
Picture of small bathroom in a corner room -> A bathroom with sink and shower.
there is a large plane in the sky that says one world -> A large flying in the sky.
a man is playing around with a picture of a tie -> A man man in unbuttoning button of a shirt.
A truck drives in a street near buildings. -> A truck truck on a street.
A sign posted in dirt reads hurst grove street pride. -> A neighborhood sign reads sign of a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:15    Loss:3.505276676857819   WER:0.7863317562697518

A room with a single bed made up and a desk with computer stuff on it. -> A with a allow on a computer.
A horse is standing next to a fence. -> A brown brown foot in a barn.
An intersection by the capital building has lots of stoplights. -> A on pole on street.
Baker hunched over applying green frosting to pastries. -> A man is pastries of a cookies.
A stove that has an open capartment at the bottom of the stove.  -> A drawer in the seat of a empty.
Picture of small bathroom in a corner room -> A bathroom with sanitizer, toilet toilet and floor.
there is a large plane in the sky that says one world -> A flying flying through the sky.
a man is playing around with a picture of a tie -> A man man unbuttoning unbuttoning button in a shirt.
A truck drives in a street near buildings. -> A truck truck is on street.
A sign posted in dirt reads hurst grove street pride. -> Hurst Grove sign in the side "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:16    Loss:3.478349033217391   WER:0.7827949139699496

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom with next on a computer.
A horse is standing next to a fence. -> A cow horse horse in a barn.
An intersection by the capital building has lots of stoplights. -> A lights with pole on city.
Baker hunched over applying green frosting to pastries. -> A is icing icing icing of a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A on top of a empty.
Picture of small bathroom in a corner room -> A bathroom with a sink and shower.
there is a large plane in the sky that says one world -> A flying in the sky.
a man is playing around with a picture of a tie -> A man man tie in a tie.
A truck drives in a street near buildings. -> A driving is driving on traffic.
A sign posted in dirt reads hurst grove street pride. -> A sign on building "hurst a grove."


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:17    Loss:3.4540345221121993   WER:0.7841602136428321

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom with next and wall.
A horse is standing next to a fence. -> A man horse cage in a barn.
An intersection by the capital building has lots of stoplights. -> A of on city.
Baker hunched over applying green frosting to pastries. -> A chef is icing of a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A drawer on bottom in the empty.
Picture of small bathroom in a corner room -> A bathroom bathroom with a towel and shower.
there is a large plane in the sky that says one world -> A large flying flying in the sky.
a man is playing around with a picture of a tie -> A man man a a tie in a shirt.
A truck drives in a street near buildings. -> A though on a intersection.
A sign posted in dirt reads hurst grove street pride. -> A sign in a pole "hurst houses.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:18    Loss:3.4380204789459685   WER:0.7847377515755235

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom cramped desk and a computer.
A horse is standing next to a fence. -> A horse horse a cage in a barn.
An intersection by the capital building has lots of stoplights. -> A of lights pole a intersection of street.
Baker hunched over applying green frosting to pastries. -> A man is icing of a cookies.
A stove that has an open capartment at the bottom of the stove.  -> A stove drawer on a bottom of a empty.
Picture of small bathroom in a corner room -> A bathroom bathroom with a sink and shower.
there is a large plane in the sky that says one world -> A flying flying in the sky.
a man is playing around with a picture of a tie -> A man unbuttoning unbuttoning unbuttoning button in a shirt.
A truck drives in a street near buildings. -> A truck truck on a street.
A sign posted in dirt reads hurst grove street pride. -> Hurst Grove sign in a sign of 

  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:19    Loss:3.415228024522393   WER:0.7845852815612929

A room with a single bed made up and a desk with computer stuff on it. -> A cat and on a allow on a room.
A horse is standing next to a fence. -> A cow cow cage in a stall.
An intersection by the capital building has lots of stoplights. -> A of lights with intersection on street.
Baker hunched over applying green frosting to pastries. -> A man is table of a icing.
A stove that has an open capartment at the bottom of the stove.  -> A drawer on drawer of a empty.
Picture of small bathroom in a corner room -> A bathroom bathroom with sanitizer, sink and shower.
there is a large plane in the sky that says one world -> A airplane plane flying flying in the sky
a man is playing around with a picture of a tie -> A standing in a front of his tie.
A truck drives in a street near buildings. -> A red truck driving on parked on a buildiing
A sign posted in dirt reads hurst grove street pride. -> A Grove sign entrance of a houses.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:20    Loss:3.396272819376754   WER:0.7861746659520598

A room with a single bed made up and a desk with computer stuff on it. -> A bed on allow on a room.
A horse is standing next to a fence. -> A cow horse walking in a barn.
An intersection by the capital building has lots of stoplights. -> A of lights on intersection on street.
Baker hunched over applying green frosting to pastries. -> A person is icing of a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A of window.
Picture of small bathroom in a corner room -> A bathroom bathroom with a sink and shower.
there is a large plane in the sky that says one world -> A airplane flying in the sky.
a man is playing around with a picture of a tie -> A man man unbuttoning unbuttoning button of a shirt.
A truck drives in a street near buildings. -> A red on a buildiing
A sign posted in dirt reads hurst grove street pride. -> Hurst Grove sign community to street.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:21    Loss:3.3789788580771405   WER:0.786428782642444

A room with a single bed made up and a desk with computer stuff on it. -> A bed next on a wall.
A horse is standing next to a fence. -> A brown horse stable in a barn.
An intersection by the capital building has lots of stoplights. -> A of lights with walk of light.
Baker hunched over applying green frosting to pastries. -> A man is pastries of a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A drawer in top of a empty.
Picture of small bathroom in a corner room -> A bathroom with handicap accessible accented and brown.
there is a large plane in the sky that says one world -> A large plane plane through the sky.
a man is playing around with a picture of a tie -> A man with a a in a neck.
A truck drives in a street near buildings. -> A truck truck bus on a street.
A sign posted in dirt reads hurst grove street pride. -> A neighborhood to it.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:22    Loss:3.380257229342225   WER:0.7876808848805189

A room with a single bed made up and a desk with computer stuff on it. -> A bed with a allow and minimal room.
A horse is standing next to a fence. -> A brown horse hay in a barn.
An intersection by the capital building has lots of stoplights. -> A lights intersection of a road.
Baker hunched over applying green frosting to pastries. -> A man is pastries of a pastry.
A stove that has an open capartment at the bottom of the stove.  -> A oven drawer drawer of a empty.
Picture of small bathroom in a corner room -> A bathroom with sink and sink
there is a large plane in the sky that says one world -> A plane plane in the sky
a man is playing around with a picture of a tie -> A man holding in tie of a tie.
A truck drives in a street near buildings. -> A truck of a street.
A sign posted in dirt reads hurst grove street pride. -> Hurst Grove sign community in of building.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:23    Loss:3.361677549013418   WER:0.7842341384982165

A room with a single bed made up and a desk with computer stuff on it. -> A allow and minimal room.
A horse is standing next to a fence. -> A brown a horse in a barn.
An intersection by the capital building has lots of stoplights. -> A of lights with all of a street.
Baker hunched over applying green frosting to pastries. -> A baker baker icing of a icing.
A stove that has an open capartment at the bottom of the stove.  -> A utility drawer drawer of empty.
Picture of small bathroom in a corner room -> A bathroom with a sink and shower.
there is a large plane in the sky that says one world -> A large plane flying flying through the sky.
a man is playing around with a picture of a tie -> A man man wearing button in a tie
A truck drives in a street near buildings. -> A driving of street.
A sign posted in dirt reads hurst grove street pride. -> A in entrance to a sign.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:24    Loss:3.3607217902519744   WER:0.7897461605278234

A room with a single bed made up and a desk with computer stuff on it. -> A bedroom with next on a computer.
A horse is standing next to a fence. -> A brown horse standing in a barn.
An intersection by the capital building has lots of stoplights. -> A traffic lights with lights of city.
Baker hunched over applying green frosting to pastries. -> A baker is pastries of a icing.
A stove that has an open capartment at the bottom of the stove.  -> A oven bottom of a empty.
Picture of small bathroom in a corner room -> A sink with sink and sink.
there is a large plane in the sky that says one world -> A large flying through the sky
a man is playing around with a picture of a tie -> A man man in a tie of a neck.
A truck drives in a street near buildings. -> A truck driving though driving on traffic.
A sign posted in dirt reads hurst grove street pride. -> Hurst sign on a to houses.


  0%|          | 0/2912 [00:00<?, ?it/s]

  0%|          | 0/324 [00:00<?, ?it/s]


Epoch:25    Loss:3.3529478908008774   WER:0.7856456412057143

A room with a single bed made up and a desk with computer stuff on it. -> A bed next and a computer.
A horse is standing next to a fence. -> A black and harness in a barn
An intersection by the capital building has lots of stoplights. -> A traffic traffic traffic lights of lights of street.
Baker hunched over applying green frosting to pastries. -> A baker baker decorating pastries of a icing.
A stove that has an open capartment at the bottom of the stove.  -> A drawer on a drawer of a appliance.
Picture of small bathroom in a corner room -> A bathroom with tub/shower sink and sink.
there is a large plane in the sky that says one world -> A in the sky
a man is playing around with a picture of a tie -> A man man a a standing of his tie.
A truck drives in a street near buildings. -> A truck driving driving on a street.
A sign posted in dirt reads hurst grove street pride. -> A telephone sign is a sign of a it.
