In [1]:
import numpy as np
import csv
import torch

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/data/rgur/courses/cs_7643_deep_learning/hw4/')

# Just run this block. Please do not modify the following code.
import math
import time

# Pytorch package
import torch
import torch.nn as nn
import torch.optim as optim

# Torchtest package
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator, Example, TabularDataset, interleave_keys

# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm

# Code provide to you for training and evaluation
from hw4_code.utils import train, evaluate, set_seed_nb, unit_test_values

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("You are using device: %s" % device)

You are using device: cuda


# En -> De

In [2]:
# You don't need to modify any code in this block

# Define the maximum length of the sentence. Shorter sentences will be padded to that length and longer sentences will be croped. Given that the average length of the sentence in the corpus is around 13, we can set it to 20
MAX_LEN = 20

# Define the source and target language
SRC = Field(tokenize = "spacy",
            tokenizer_language="en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

TRG = Field(tokenize = "spacy",
            tokenizer_language="de",
            #tokenizer_language="de",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)



In [3]:
train_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/en-de/train.ende.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG)})

val_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/en-de/dev.ende.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG)})



In [4]:
# Define Batchsize
BATCH_SIZE = 128

# Build the vocabulary associated with each language
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

# Get the padding index to be ignored later in loss calculation
PAD_IDX = TRG.vocab.stoi['<pad>']

In [5]:
# Get data-loaders using BucketIterator
train_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    train_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

val_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    val_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

# Get the input and the output sizes for model
input_size = len(SRC.vocab)
output_size = len(TRG.vocab)



In [6]:
input_size, output_size

(9850, 9214)

In [7]:
from hw4_code.models.Transformer import TransformerTranslator

In [8]:
# Hyperparameters
learning_rate = .001
EPOCHS = 250

# Model
trans_model = TransformerTranslator(input_size, output_size, device, max_length = MAX_LEN).to(device)

# optimizer = optim.Adam(model.parameters(), lr = learning_rate)
optimizer = torch.optim.Adam(trans_model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

In [9]:
best_avg_val_loss = np.inf
for epoch_idx in range(EPOCHS):
    print("-----------------------------------")
    print("Epoch %d" % (epoch_idx+1))
    print("-----------------------------------")
    
    train_loss, avg_train_loss = train(trans_model, train_loader, optimizer, criterion)
    scheduler.step(train_loss)

    val_loss, avg_val_loss = evaluate(trans_model, val_loader, criterion)
    avg_train_loss = avg_train_loss.item()
    avg_val_loss = avg_val_loss.item()
    print("Training Loss: %.4f. Validation Loss: %.4f. " % (avg_train_loss, avg_val_loss))
    print("Training Perplexity: %.4f. Validation Perplexity: %.4f. " % (np.exp(avg_train_loss), np.exp(avg_val_loss)))

    if avg_val_loss < best_avg_val_loss:
        best_avg_val_loss = avg_val_loss
        torch.save(trans_model.state_dict(), 'en_de.pt')
        print('Best Model Saved')

-----------------------------------
Epoch 1
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]



  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 5.9312. Validation Loss: 4.4111. 
Training Perplexity: 376.6132. Validation Perplexity: 82.3596. 
Best Model Saved
-----------------------------------
Epoch 2
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.8631. Validation Loss: 4.1526. 
Training Perplexity: 129.4248. Validation Perplexity: 63.5996. 
Best Model Saved
-----------------------------------
Epoch 3
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.5072. Validation Loss: 3.9801. 
Training Perplexity: 90.6637. Validation Perplexity: 53.5231. 
Best Model Saved
-----------------------------------
Epoch 4
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 4.1804. Validation Loss: 3.8724. 
Training Perplexity: 65.3921. Validation Perplexity: 48.0560. 
Best Model Saved
-----------------------------------
Epoch 5
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.8701. Validation Loss: 3.7758. 
Training Perplexity: 47.9452. Validation Perplexity: 43.6342. 
Best Model Saved
-----------------------------------
Epoch 6
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.5868. Validation Loss: 3.7508. 
Training Perplexity: 36.1168. Validation Perplexity: 42.5555. 
Best Model Saved
-----------------------------------
Epoch 7
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.3350. Validation Loss: 3.7011. 
Training Perplexity: 28.0775. Validation Perplexity: 40.4931. 
Best Model Saved
-----------------------------------
Epoch 8
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 3.1047. Validation Loss: 3.6675. 
Training Perplexity: 22.3028. Validation Perplexity: 39.1551. 
Best Model Saved
-----------------------------------
Epoch 9
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.9151. Validation Loss: 3.6777. 
Training Perplexity: 18.4507. Validation Perplexity: 39.5561. 
-----------------------------------
Epoch 10
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.7347. Validation Loss: 3.6372. 
Training Perplexity: 15.4046. Validation Perplexity: 37.9869. 
Best Model Saved
-----------------------------------
Epoch 11
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.5706. Validation Loss: 3.6400. 
Training Perplexity: 13.0731. Validation Perplexity: 38.0904. 
-----------------------------------
Epoch 12
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.3868. Validation Loss: 3.6792. 
Training Perplexity: 10.8781. Validation Perplexity: 39.6159. 
-----------------------------------
Epoch 13
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.1985. Validation Loss: 3.7064. 
Training Perplexity: 9.0111. Validation Perplexity: 40.7053. 
-----------------------------------
Epoch 14
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 2.0637. Validation Loss: 3.7463. 
Training Perplexity: 7.8751. Validation Perplexity: 42.3635. 
-----------------------------------
Epoch 15
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.9128. Validation Loss: 3.7386. 
Training Perplexity: 6.7723. Validation Perplexity: 42.0400. 
-----------------------------------
Epoch 16
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.7876. Validation Loss: 3.8048. 
Training Perplexity: 5.9750. Validation Perplexity: 44.9171. 
-----------------------------------
Epoch 17
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Training Loss: 1.6756. Validation Loss: 3.8655. 
Training Perplexity: 5.3422. Validation Perplexity: 47.7285. 
-----------------------------------
Epoch 18
-----------------------------------


  0%|          | 0/55 [00:00<?, ?it/s]