In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchtext.datasets import TranslationDataset, Multi30k
from torchtext.data import Field, BucketIterator

import spacy
import numpy as np

import random
import math
import time

In [51]:
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

def tokenize_de(text):
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

# GRU model

In [None]:
SRC = Field(tokenize = tokenize_de, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'), 
                                                    fields = (SRC, TRG))

SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

BATCH_SIZE = 128

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    device = device)

In [55]:
%load_ext autoreload
%autoreload 2
from utilities import GRU

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

model = GRU.Seq2Seq(input_dim=INPUT_DIM, 
                      enc_emb_dim=ENC_EMB_DIM, 
                      hid_dim=HID_DIM, 
                      enc_dropout=ENC_DROPOUT,
                      output_dim=OUTPUT_DIM, 
                      dec_emb_dim=DEC_EMB_DIM,
                      dec_dropout=DEC_DROPOUT,
                      device=device).to(device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.01)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(7855, 256)
    (rnn): GRU(256, 512)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(5893, 256)
    (rnn): GRU(768, 512)
    (fc_out): Linear(in_features=1280, out_features=5893, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [57]:
optimizer = optim.Adam(model.parameters())

TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [59]:
%load_ext autoreload
%autoreload 2
from utilities.TrainModel import TrainModel

train_GRU = TrainModel(model=model,
                         train_iterator=train_iterator,
                         val_iterator=valid_iterator,
                         optimizer=optimizer,
                         criterion=criterion,
                         model_type='')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [60]:
N_EPOCHS = 10
CLIP = 1
train_GRU.epoch(n_epochs=N_EPOCHS, clip=CLIP,model_name='att-model.pt')

Epoch: 01 | Time: 0m 37s
	Train Loss: 5.036 | Train PPL: 153.924
	 Val. Loss: 5.335 |  Val. PPL: 207.373
Epoch: 02 | Time: 0m 37s
	Train Loss: 4.379 | Train PPL:  79.759
	 Val. Loss: 5.130 |  Val. PPL: 169.084
Epoch: 03 | Time: 0m 37s
	Train Loss: 4.086 | Train PPL:  59.488
	 Val. Loss: 4.786 |  Val. PPL: 119.875
Epoch: 04 | Time: 0m 37s
	Train Loss: 3.777 | Train PPL:  43.675
	 Val. Loss: 4.405 |  Val. PPL:  81.846
Epoch: 05 | Time: 0m 37s
	Train Loss: 3.490 | Train PPL:  32.777
	 Val. Loss: 4.246 |  Val. PPL:  69.842
Epoch: 06 | Time: 0m 38s
	Train Loss: 3.238 | Train PPL:  25.493
	 Val. Loss: 4.068 |  Val. PPL:  58.461
Epoch: 07 | Time: 0m 38s
	Train Loss: 2.960 | Train PPL:  19.295
	 Val. Loss: 3.857 |  Val. PPL:  47.344
Epoch: 08 | Time: 0m 37s
	Train Loss: 2.747 | Train PPL:  15.602
	 Val. Loss: 3.765 |  Val. PPL:  43.160
Epoch: 09 | Time: 0m 37s
	Train Loss: 2.525 | Train PPL:  12.493
	 Val. Loss: 3.758 |  Val. PPL:  42.843
Epoch: 10 | Time: 0m 38s
	Train Loss: 2.330 | Train PPL

# Attention Model

In [61]:
SRC = Field(tokenize = tokenize_de, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            include_lengths = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'), 
                                                    fields = (SRC, TRG))

SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

BATCH_SIZE = 128

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
     batch_size = BATCH_SIZE,
     sort_within_batch = True,
     sort_key = lambda x : len(x.src),
     device = device)

In [62]:
%load_ext autoreload
%autoreload 2
from utilities import BiGRUwithAttention

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]

model2 = BiGRUwithAttention.Seq2Seq(input_dim=INPUT_DIM, 
                                  enc_emb_dim=ENC_EMB_DIM, 
                                  enc_hid_dim=ENC_HID_DIM,
                                  enc_dropout=ENC_DROPOUT,
                                  output_dim=OUTPUT_DIM, 
                                  dec_emb_dim=DEC_EMB_DIM,
                                  dec_hid_dim=DEC_HID_DIM,
                                  dec_dropout=DEC_DROPOUT,
                                  src_pad_idx=SRC_PAD_IDX,
                                  device=device).to(device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [63]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.01)
        
model2.apply(init_weights)

optimizer = optim.Adam(model2.parameters())

TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [65]:
%load_ext autoreload
%autoreload 2
from utilities.TrainModel import TrainModel

train_AttentionGRU = TrainModel(model=model2,
                         train_iterator=train_iterator,
                         val_iterator=valid_iterator,
                         optimizer=optimizer,
                         criterion=criterion,
                         model_type='Attention')

N_EPOCHS = 10
CLIP = 1
train_AttentionGRU.epoch(n_epochs=N_EPOCHS, clip=CLIP,model_name='att-model.pt')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Epoch: 01 | Time: 0m 45s
	Train Loss: 5.008 | Train PPL: 149.661
	 Val. Loss: 4.730 |  Val. PPL: 113.332
Epoch: 02 | Time: 0m 45s
	Train Loss: 3.974 | Train PPL:  53.187
	 Val. Loss: 4.039 |  Val. PPL:  56.782
Epoch: 03 | Time: 0m 45s
	Train Loss: 3.301 | Train PPL:  27.149
	 Val. Loss: 3.588 |  Val. PPL:  36.159
Epoch: 04 | Time: 0m 46s
	Train Loss: 2.805 | Train PPL:  16.533
	 Val. Loss: 3.381 |  Val. PPL:  29.400
Epoch: 05 | Time: 0m 46s
	Train Loss: 2.435 | Train PPL:  11.413
	 Val. Loss: 3.309 |  Val. PPL:  27.370
Epoch: 06 | Time: 0m 46s
	Train Loss: 2.155 | Train PPL:   8.626
	 Val. Loss: 3.301 |  Val. PPL:  27.144
Epoch: 07 | Time: 0m 45s
	Train Loss: 1.927 | Train PPL:   6.869
	 Val. Loss: 3.206 |  Val. PPL:  24.687
Epoch: 08 | Time: 0m 45s
	Train Loss: 1.747 | Train PPL:   5.737
	 Val. Loss: 3.298 |  Val. PPL:  27.070
Epoch: 09 | Time: 0m 45s
	Train Loss: 1.566 | Train PPL:   4.790
	 Val. 

# Transformer

In [73]:
SRC = Field(tokenize = tokenize_de, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

train_data, valid_data, test_data = Multi30k.splits(exts = ('.de', '.en'), 
                                                    fields = (SRC, TRG))


SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)


BATCH_SIZE = 128

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
     batch_size = BATCH_SIZE,
     device = device)

In [74]:
%load_ext autoreload
%autoreload 2
from utilities import Transformer

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_HID_DIM = 256
DEC_HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model3 = Transformer.Seq2Seq(input_dim=INPUT_DIM, 
                                  enc_hid_dim=ENC_HID_DIM,
                                  enc_layers=ENC_LAYERS,
                                  enc_heads=ENC_HEADS,
                                  enc_pf_dim=ENC_PF_DIM,
                                  enc_dropout=ENC_DROPOUT,
                                  output_dim=OUTPUT_DIM,
                                  dec_hid_dim=DEC_HID_DIM,
                                  dec_layers=DEC_LAYERS,
                                  dec_heads=DEC_HEADS,
                                  dec_pf_dim=DEC_PF_DIM,
                                  dec_dropout=DEC_DROPOUT,
                                  src_pad_idx=SRC_PAD_IDX,
                                  trg_pad_idx=TRG_PAD_IDX, 
                                  device=device).to(device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.normal_(param.data, mean=0, std=0.01)
        
model3.apply(init_weights)

optimizer = optim.Adam(model3.parameters())

TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [76]:
%load_ext autoreload
%autoreload 2
from utilities.TrainModel import TrainModel

train_Transformer = TrainModel(model=model3,
                         train_iterator=train_iterator,
                         val_iterator=valid_iterator,
                         optimizer=optimizer,
                         criterion=criterion,
                         model_type='Transformer')

N_EPOCHS = 20
CLIP = 1
train_Transformer.epoch(n_epochs=N_EPOCHS, clip=CLIP, model_name='transformer-model.pt')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Epoch: 01 | Time: 0m 16s
	Train Loss: 5.552 | Train PPL: 257.858
	 Val. Loss: 3.938 |  Val. PPL:  51.311
Epoch: 02 | Time: 0m 17s
	Train Loss: 3.720 | Train PPL:  41.244
	 Val. Loss: 3.369 |  Val. PPL:  29.057
Epoch: 03 | Time: 0m 17s
	Train Loss: 3.275 | Train PPL:  26.452
	 Val. Loss: 3.036 |  Val. PPL:  20.821
Epoch: 04 | Time: 0m 17s
	Train Loss: 2.951 | Train PPL:  19.116
	 Val. Loss: 2.781 |  Val. PPL:  16.138
Epoch: 05 | Time: 0m 17s
	Train Loss: 2.669 | Train PPL:  14.425
	 Val. Loss: 2.544 |  Val. PPL:  12.727
Epoch: 06 | Time: 0m 17s
	Train Loss: 2.387 | Train PPL:  10.877
	 Val. Loss: 2.303 |  Val. PPL:  10.008
Epoch: 07 | Time: 0m 17s
	Train Loss: 2.101 | Train PPL:   8.178
	 Val. Loss: 2.107 |  Val. PPL:   8.224
Epoch: 08 | Time: 0m 17s
	Train Loss: 1.862 | Train PPL:   6.436
	 Val. Loss: 1.978 |  Val. PPL:   7.225
Epoch: 09 | Time: 0m 17s
	Train Loss: 1.670 | Train PPL:   5.314
	 Val. 