In [1]:
%load_ext autoreload
%autoreload 2

In [74]:
import random
import math
import time

from functools import reduce

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from utils import *
from SeqAutoencoder import * 
from LoaderData import *

[nltk_data] Downloading package stopwords to /home/ivy/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/ivy/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [75]:
SEED = 1

random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [90]:
folder_name = r"pascal_dataset"
word_description, image_description = prepare_pascal(folder_name)

In [91]:
# Use only one description per image
for name, descr in word_description.items():
    word_description[name] = descr[1]

In [97]:
flat_text = [item for sublist in list(word_description.values()) for item in sublist]
#flat_text = [item for sublist in flat_text for item in sublist]

In [98]:
vocab = {'<BOS>':0, '<EOS>':1, '<UNK>':2, '<PAD>':3}
count = 4
for word in flat_text:
    if word not in vocab:
        vocab[word] = count
        count += 1

In [99]:
batch_size = 32

In [101]:
train_data_loader = PascalLoadData(list(image_description.keys()), image_description, word_description, vocab)
train_data_iterator = DataLoader(train_data_loader, batch_size=batch_size)

In [104]:
INPUT_DIM = OUTPUT_DIM = len(vocab)
ENC_EMB_DIM = 64
DEC_EMB_DIM = 64
HID_DIM = 256
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

In [105]:
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

In [107]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [108]:
model = Seq2Seq(enc, dec, device).to(device)

In [109]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(1402, 64)
    (rnn): LSTM(64, 256, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5)
  )
  (decoder): Decoder(
    (embedding): Embedding(1402, 64)
    (rnn): LSTM(64, 256, num_layers=2, dropout=0.5)
    (out): Linear(in_features=256, out_features=1402, bias=True)
    (dropout): Dropout(p=0.5)
  )
)

In [111]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,251,898 trainable parameters


In [113]:
optimizer = optim.Adam(model.parameters())

In [114]:
PAD_IDX = vocab['<PAD>']

criterion = nn.CrossEntropyLoss(ignore_index = PAD_IDX)

In [121]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, (_, descriptions) in enumerate(iterator):
        
        src = trg = descriptions.cuda()
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        #trg = [trg sent len, batch size]
        #output = [trg sent len, batch size, output dim]
        
        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)
        
        #trg = [(trg sent len - 1) * batch size]
        #output = [(trg sent len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [122]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, (_, descriptions) in enumerate(iterator):

            src = trg = descriptions.cuda()

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg sent len, batch size]
            #output = [trg sent len, batch size, output dim]

            output = output[1:].view(-1, output.shape[-1])
            trg = trg[1:].view(-1)

            #trg = [(trg sent len - 1) * batch size]
            #output = [(trg sent len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [123]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [125]:
N_EPOCHS = 40
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_data_iterator, optimizer, criterion, CLIP)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')

Epoch: 01 | Time: 0m 4s
	Train Loss: 4.398 | Train PPL:  81.327
Epoch: 02 | Time: 0m 4s
	Train Loss: 4.369 | Train PPL:  78.932
Epoch: 03 | Time: 0m 4s
	Train Loss: 4.347 | Train PPL:  77.225
Epoch: 04 | Time: 0m 4s
	Train Loss: 4.307 | Train PPL:  74.218
Epoch: 05 | Time: 0m 4s
	Train Loss: 4.272 | Train PPL:  71.635
Epoch: 06 | Time: 0m 4s
	Train Loss: 4.240 | Train PPL:  69.397
Epoch: 07 | Time: 0m 4s
	Train Loss: 4.213 | Train PPL:  67.542
Epoch: 08 | Time: 0m 3s
	Train Loss: 4.187 | Train PPL:  65.837
Epoch: 09 | Time: 0m 3s
	Train Loss: 4.155 | Train PPL:  63.722
Epoch: 10 | Time: 0m 3s
	Train Loss: 4.134 | Train PPL:  62.412
Epoch: 11 | Time: 0m 3s
	Train Loss: 4.114 | Train PPL:  61.163
Epoch: 12 | Time: 0m 3s
	Train Loss: 4.084 | Train PPL:  59.371
Epoch: 13 | Time: 0m 4s
	Train Loss: 4.069 | Train PPL:  58.490
Epoch: 14 | Time: 0m 4s
	Train Loss: 4.040 | Train PPL:  56.841
Epoch: 15 | Time: 0m 3s
	Train Loss: 4.033 | Train PPL:  56.423
Epoch: 16 | Time: 0m 3s
	Train Loss: 4.0