In [1]:
import numpy as np
import csv
import torch

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/data/rgur/courses/cs_7643_deep_learning/hw4/')

# Just run this block. Please do not modify the following code.
import math
import time

# Pytorch package
import torch
import torch.nn as nn
import torch.optim as optim

# Torchtest package
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator, Example, TabularDataset, interleave_keys, LabelField

# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm

# Code provide to you for training and evaluation
#from hw4_code.utils import train, evaluate, set_seed_nb, unit_test_values
from utils import qe_train, qe_evaluate

import importlib

# Check device availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("You are using device: %s" % device)

You are using device: cuda


In [2]:
# You don't need to modify any code in this block

# Define the maximum length of the sentence. Shorter sentences will be padded to that length and longer sentences will be croped. Given that the average length of the sentence in the corpus is around 13, we can set it to 20
MAX_LEN = 20

# Define the source and target language
SRC = Field(tokenize = "spacy",
            tokenizer_language="ro_core_news_sm",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

TRG = Field(tokenize = "spacy",
            tokenizer_language="en",
            init_token = '<sos>',
            eos_token = '<eos>',
            fix_length = MAX_LEN,
            lower = True)

Z = LabelField(dtype=torch.float, batch_first=True, use_vocab=False)

train_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/train.roen.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG),
'z_mean':('z',Z)})

val_data = TabularDataset(path='/data/rgur/courses/cs_7643_deep_learning/project/mlqe/data/ro-en/dev.roen.df.short.tsv',format='TSV',fields={'original':('src',SRC),
'translation':('trg',TRG),
'z_mean':('z',Z)})

# Define Batchsize
BATCH_SIZE = 128

# Build the vocabulary associated with each language
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

# Get the padding index to be ignored later in loss calculation
PAD_IDX = TRG.vocab.stoi['<pad>']

# Get data-loaders using BucketIterator
train_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    train_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

val_loader = BucketIterator( #Defines an iterator that batches examples of similar lengths together.
    val_data,
    batch_size = BATCH_SIZE, device = device, sort_key=lambda x: interleave_keys(len(x.src), len(x.trg)))

# Get the input and the output sizes for model
input_size = len(SRC.vocab)
output_size = len(TRG.vocab)



In [3]:
class QEModel(nn.Module):
    """
    A single-layer Transformer which encodes a sequence of text and 
    performs binary classification.

    The model has a vocab size of V, works on
    sequences of length T, has an hidden dimension of H, uses word vectors
    also of dimension H, and operates on minibatches of size N.
    """
    def __init__(self, input_size=256*20):
        super(QEModel, self).__init__()
        self.lin1 = nn.Linear(256*20,1028)
        self.lin2 = nn.Linear(1028,1)
    
    def forward(self, x):
        x = nn.functional.relu( self.lin1(x) )
        x = nn.functional.dropout(x,p=.2)
        return self.lin2(x)

In [4]:
qe_model = QEModel().to(device)

In [5]:
#importlib.reload(hw4_code.models.Transformer)
from hw4_code.models.Transformer import TransformerTranslator
source_model = TransformerTranslator(output_size, input_size, device, max_length = MAX_LEN).to(device)
target_model = TransformerTranslator(input_size, output_size, device, max_length = MAX_LEN).to(device)

In [6]:
source_model.load_state_dict(torch.load('/data/rgur/courses/cs_7643_deep_learning/project/en_ro.pt'))
target_model.load_state_dict(torch.load('/data/rgur/courses/cs_7643_deep_learning/project/ro_en.pt'))

<All keys matched successfully>

In [7]:
# Hyperparameters
learning_rate = .001
EPOCHS = 25

# Model


# optimizer = optim.Adam(model.parameters(), lr = learning_rate)
optimizer = torch.optim.Adam(qe_model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
criterion = nn.MSELoss()

In [13]:
for epoch_idx in range(EPOCHS):
    print("-----------------------------------")
    print("Epoch %d" % (epoch_idx+1))
    print("-----------------------------------")
    
    train_loss, avg_train_loss = qe_train(qe_model, source_model, target_model, train_loader, optimizer, criterion, scheduler = None)
    train_loss = train_loss.item()
    scheduler.step(train_loss)

    #val_loss, avg_val_loss = evaluate(trans_model, val_loader, criterion)

    avg_train_loss = avg_train_loss.item()
    val_loss, avg_val_loss, r2, mae = qe_evaluate(qe_model, source_model, target_model, val_loader, criterion)
    #avg_val_loss = 0
    avg_val_loss = avg_val_loss.item()

    print("Training Loss: %.4f. Validation RMSE: %.4f. Validation R2: %.4f. Validation MAE %.4f" % (np.sqrt(avg_train_loss), np.sqrt(avg_val_loss), r2, mae))

-----------------------------------
Epoch 1
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.7939, -0.8459,  0.0559, -0.0794,  0.8472], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([0.8464, 0.6517, 0.8446, 0.8611, 0.5942], device='cuda:0')
Training Loss: 0.5842. Validation Loss: 0.9382. 
-----------------------------------
Epoch 2
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.0605,  0.6486,  0.2190, -0.3339, -0.9219], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.0822, -0.0867,  0.6109, -0.9202, -0.2225], device='cuda:0')
Training Loss: 0.5779. Validation Loss: 1.0057. 
-----------------------------------
Epoch 3
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.0642, -0.5360,  0.5206,  0.2468,  0.0642], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.1547,  0.5908,  1.0286,  0.9036, -1.8131], device='cuda:0')
Training Loss: 0.5665. Validation Loss: 0.9892. 
-----------------------------------
Epoch 4
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.3770, -1.0760, -0.6581,  0.5495,  1.1624], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.3768,  0.7346,  0.9151,  0.9233,  0.8853], device='cuda:0')
Training Loss: 0.5700. Validation Loss: 0.9677. 
-----------------------------------
Epoch 5
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.3450,  0.9693,  0.0054, -0.5569,  0.2342], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.3250, -0.0823,  0.9379,  0.5809, -0.7997], device='cuda:0')
Training Loss: 0.5620. Validation Loss: 0.9730. 
-----------------------------------
Epoch 6
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-1.4550,  0.0772, -0.3435,  0.9533, -0.3748], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-2.4535,  1.0141,  0.9036, -2.3172, -1.3027], device='cuda:0')
Training Loss: 0.5694. Validation Loss: 1.0558. 
-----------------------------------
Epoch 7
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.0819, -0.4428, -0.4584,  0.2572,  0.0819], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 1.0124,  0.8918, -0.2678, -1.2714,  0.9957], device='cuda:0')
Training Loss: 0.5538. Validation Loss: 0.9636. 
-----------------------------------
Epoch 8
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.2315,  0.0861,  0.4705, -0.6416,  0.0252], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.9761,  0.8218,  0.1913, -1.1457, -0.6204], device='cuda:0')
Training Loss: 0.5516. Validation Loss: 0.9921. 
-----------------------------------
Epoch 9
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.3157,  0.5977,  0.2537,  0.0907, -0.2794], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.3378,  0.9167, -0.9097, -1.0627,  0.3284], device='cuda:0')
Training Loss: 0.5563. Validation Loss: 0.9820. 
-----------------------------------
Epoch 10
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.4067,  0.0954, -0.3968,  0.6820,  0.0954], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.8477,  0.8102, -1.2777, -0.4640, -1.1226], device='cuda:0')
Training Loss: 0.5470. Validation Loss: 0.9970. 
-----------------------------------
Epoch 11
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-1.6888,  0.1017,  0.1017, -0.7516,  0.4890], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.9555,  0.8039,  0.7947, -0.7703, -0.6692], device='cuda:0')
Training Loss: 0.5492. Validation Loss: 0.9538. 
-----------------------------------
Epoch 12
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.3189, -0.8444,  1.2412,  0.5323, -0.7384], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.3113,  0.6517,  0.9584, -1.0702,  0.9103], device='cuda:0')
Training Loss: 0.5497. Validation Loss: 0.9875. 
-----------------------------------
Epoch 13
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.1090, -0.3017,  0.4913, -1.1920, -0.7420], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-1.3250, -0.7916, -0.6923,  0.9087,  0.7832], device='cuda:0')
Training Loss: 0.5378. Validation Loss: 0.9889. 
-----------------------------------
Epoch 14
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.6588,  0.9149, -0.7182, -1.1437,  0.0726], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.1077,  0.8427, -0.4129, -0.2316,  0.2152], device='cuda:0')
Training Loss: 0.5313. Validation Loss: 1.0924. 
-----------------------------------
Epoch 15
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.1151,  0.7393, -0.2762, -0.2702,  0.0521], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.3663,  0.6868, -0.1279, -0.9767, -0.3980], device='cuda:0')
Training Loss: 0.5295. Validation Loss: 0.9717. 
-----------------------------------
Epoch 16
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.7281, -0.5970, -0.2317,  1.1929,  0.0771], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.9288,  0.5648,  0.7696, -2.3172,  0.9797], device='cuda:0')
Training Loss: 0.5343. Validation Loss: 1.0221. 
-----------------------------------
Epoch 17
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.0489,  0.3968, -0.4464, -0.3781,  0.1247], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.9154, -0.2613, -1.4028,  0.8098,  0.6234], device='cuda:0')
Training Loss: 0.5379. Validation Loss: 0.9672. 
-----------------------------------
Epoch 18
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.1290, -0.6467,  0.4364,  0.3864, -0.2959], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-1.0359,  0.5426,  0.9624,  0.8966, -0.4021], device='cuda:0')
Training Loss: 0.5338. Validation Loss: 1.0097. 
-----------------------------------
Epoch 19
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.1368, -0.4321, -0.0563,  0.3369,  0.7021], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.3879, -1.4028, -2.5997, -0.5241,  1.0561], device='cuda:0')
Training Loss: 0.5308. Validation Loss: 1.0640. 
-----------------------------------
Epoch 20
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.0334,  0.1358, -0.2127, -0.5306,  0.1358], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.4616,  0.2037, -0.1854, -0.6842, -0.0464], device='cuda:0')
Training Loss: 0.5384. Validation Loss: 1.0492. 
-----------------------------------
Epoch 21
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([ 0.1392, -0.1859, -0.1111,  0.3832, -1.8208], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([ 0.7052,  0.0950, -0.8332, -2.2937,  0.6523], device='cuda:0')
Training Loss: 0.5404. Validation Loss: 0.9650. 
-----------------------------------
Epoch 22
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.9759,  0.4458, -0.1753,  0.0752,  0.1428], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-2.4080, -0.4405,  0.9185, -0.4319, -0.0356], device='cuda:0')
Training Loss: 0.5217. Validation Loss: 1.0437. 
-----------------------------------
Epoch 23
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.4285,  0.5735,  0.1438, -0.1483,  0.2890], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.6159,  0.9535,  0.1559, -0.5333,  0.9010], device='cuda:0')
Training Loss: 0.5190. Validation Loss: 1.0659. 
-----------------------------------
Epoch 24
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.9348,  1.0479, -0.0822, -0.5423,  1.3441], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-1.8307,  1.0561, -1.0570, -1.6819,  1.0055], device='cuda:0')
Training Loss: 0.5254. Validation Loss: 1.0184. 
-----------------------------------
Epoch 25
-----------------------------------


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

preds: tensor([-0.6575,  0.1499,  0.1089,  0.3750,  0.3917], device='cuda:0',
       grad_fn=<SliceBackward>)
label: tensor([-0.2678, -0.3062, -0.8595,  0.1183, -0.3075], device='cuda:0')
Training Loss: 0.5189. Validation Loss: 1.0658. 
