In [1]:
import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F

from encoder import *
from AttnDecoder import * 
from seq2seq import *

from build_dataset import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load data 
train_file_path = {
    'source': f"data/processed/src-train.txt",
    'target': f"data/processed/tgt-train.txt"
}

test_file_path = {
    'source': f"data/processed/src-test.txt",
    'target': f"data/processed/tgt-test.txt"
}

train_dataset = QAPair(train_file_path)
test_dataset = QAPair(test_file_path)

train_dataloader = DataLoader(
            train_dataset, 
            batch_size=128, 
            shuffle=True, 
            collate_fn=partial(pad_collate_fn, pad_token=train_dataset.pad_idx)
        )

test_dataloader = DataLoader(
            test_dataset, 
            batch_size=128, 
            shuffle=True, 
            collate_fn=partial(pad_collate_fn, pad_token=test_dataset.pad_idx)
        )

In [3]:
pretrained_vectors = {}
pretrained_vectors['enc'] = torch.load('embeddings/encoder_emb.pt').float()
pretrained_vectors['dec'] = torch.load('embeddings/decoder_emb.pt').float()

# input_size = len(train_dataset.answer_vocab)
embed_size = 50  
output_size = len(train_dataset.question_vocab) 


In [9]:
def train_step(batch, model, optimizer, criterion, device):
    input = batch.input_vecs.to(device) 
    inputs_len = batch.input_lens.to(device)
    target = batch.target_vecs.to(device)
    target_len = batch.target_lens.to(device) 
    
    model.train()

    dec_log_probs, dec_hidden = model(input, target, inputs_len, target_len)

    # scores = s2s_output.view(-1, s2s_output.size(-1))
    
    optimizer.zero_grad()
    loss = criterion(dec_log_probs.transpose(1, 2), target)
    loss.backward()
    optimizer.step()

    return loss.item()

def eval_step(batch, model, criterion, device):
    input = batch.input_vecs.to(device) 
    inputs_len = batch.input_lens.to(device)
    target = batch.target_vecs.to(device)
    target_len = batch.target_lens.to(device) 
    
    model.eval()

    dec_log_probs, dec_hidden = model(input, target, inputs_len, target_len)

    # scores = s2s_output.view(-1, s2s_output.size(-1))
    
    loss = criterion(dec_log_probs.transpose(1, 2), target)

    return loss.item()
    

In [11]:
from tqdm import tqdm 
# TRAIN LOOP 

seq2seq = Seq2Seq(pretrained_vectors, hidden_size=600, output_size=output_size)
optim = torch.optim.SGD(seq2seq.parameters(), lr=0.1)
criterion = nn.NLLLoss(ignore_index=train_dataset.pad_idx)
device = "cuda" if torch.cuda.is_available() else "cpu"

seq2seq.to(device) 

plot_cache = {}
plot_cache['train'] = []
plot_cache['val'] = []

for epoch in tqdm(range(15)):
    # train 
    train_losses = []
    for i, data in tqdm(enumerate(train_dataloader), leave=False):
        curr_loss = train_step(data, seq2seq, optim, criterion, device)

        train_losses.append(curr_loss)
    avg_train_loss = np.mean(train_losses)
    print(f'Train loss after epoch {epoch+1} = {avg_train_loss}')

    val_losses = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_dataloader), leave=False):
            curr_loss = eval_step(data, seq2seq, criterion, device)

            val_losses.append(curr_loss)

        avg_val_loss = np.mean(val_losses)
        print(f'Val loss after epoch {epoch+1} = {avg_val_loss}')

    plot_cache['train'].append(avg_train_loss)
    plot_cache['val'].append(avg_val_loss)

    


  0%|          | 0/15 [00:00<?, ?it/s]

Train loss after epoch 1 = 6.846639656544597


  7%|▋         | 1/15 [01:17<18:10, 77.90s/it]

Val loss after epoch 1 = 5.869497581194806


  7%|▋         | 1/15 [02:22<33:09, 142.08s/it]


KeyboardInterrupt: 