In [1]:
# Initialization Cell
path_data = '/home/compling6/knemoto/Datasets/Europarl_fr-en'

In [2]:
import models.Model_Evaluation as me
from models.TrainModel import TrainModel
from torchtext.data import Field, BucketIterator
from models.MyTranslationDataset import prepareTranslationData
from torchtext.datasets import TranslationDataset

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import time

In [3]:
import spacy

spacy_en = spacy.load('en')
spacy_fr = spacy.load('fr')

def tokenize_fr(text):
    return [tok.text for tok in spacy_fr.tokenizer(text)]

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

BATCH_SIZE = 8

# GRU

In [4]:
SRC = Field(tokenize = tokenize_fr, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

train, val, test = TranslationDataset.splits(exts=('.fr','.en'),
                                             fields=(SRC,TRG),
                                             path=path_data)

SRC.build_vocab(train, min_freq = 2)
TRG.build_vocab(train, min_freq = 2)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, val_iterator, test_iterator = BucketIterator.splits(
    (train, val, test), 
    batch_size = BATCH_SIZE,
    device = device)

In [6]:
from models import GRU

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

model1 = GRU.Seq2Seq(input_dim=INPUT_DIM, 
                      enc_emb_dim=ENC_EMB_DIM, 
                      hid_dim=HID_DIM, 
                      enc_dropout=ENC_DROPOUT,
                      output_dim=OUTPUT_DIM, 
                      dec_emb_dim=DEC_EMB_DIM,
                      dec_dropout=DEC_DROPOUT,
                      device=device).to(device)

In [1]:
optimizer = optim.Adam(model1.parameters(), lr=0.03, weight_decay=0.0001, betas=(0.9,0.99))
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

train_GRU = TrainModel(model=model1,
                       train_iterator=train_iterator,
                       val_iterator=val_iterator,
                       optimizer=optimizer,
                       criterion=criterion,
                       model_type='')

In [None]:
N_EPOCHS = 10
CLIP = 1
train_GRU.epoch(n_epochs=N_EPOCHS, clip=CLIP, model_name='vanilla-model.pt')

# Attention

In [4]:
SRC = Field(tokenize = tokenize_fr, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            include_lengths = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)

train, val, test = TranslationDataset.splits(exts=('.fr','.en'),
                                             fields=(SRC,TRG),
                                             path=path_data)

SRC.build_vocab(train, min_freq = 2)
TRG.build_vocab(train, min_freq = 2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, val_iterator, test_iterator = BucketIterator.splits(
    (train, val, test), 
     batch_size = BATCH_SIZE,
     sort_within_batch = True,
     sort_key = lambda x : len(x.src),
     device = device)

In [5]:
from models import BiGRUwithAttention

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
ENC_HID_DIM = 512
DEC_HID_DIM = 512
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]

model2 = BiGRUwithAttention.Seq2Seq(input_dim=INPUT_DIM, 
                                  enc_emb_dim=ENC_EMB_DIM, 
                                  enc_hid_dim=ENC_HID_DIM,
                                  enc_dropout=ENC_DROPOUT,
                                  output_dim=OUTPUT_DIM, 
                                  dec_emb_dim=DEC_EMB_DIM,
                                  dec_hid_dim=DEC_HID_DIM,
                                  dec_dropout=DEC_DROPOUT,
                                  src_pad_idx=SRC_PAD_IDX,
                                  device=device).to(device)

In [6]:
optimizer = optim.Adam(model2.parameters(), lr=0.03, weight_decay=0.0001, betas=(0.9,0.99))
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

train_AttentionGRU = TrainModel(model=model2,
                         train_iterator=train_iterator,
                         val_iterator=val_iterator,
                         optimizer=optimizer,
                         criterion=criterion,
                         model_type='Attention')

In [2]:
N_EPOCHS = 10
CLIP = 1
train_AttentionGRU.epoch(n_epochs=N_EPOCHS, clip=CLIP,model_name='att-model.pt')

# Transformer

In [None]:
SRC = Field(tokenize = tokenize_fr, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True, 
            batch_first = True)

train, val, test = TranslationDataset.splits(exts=('.fr','.en'),
                                             fields=(SRC,TRG),
                                             path=path_data)

SRC.build_vocab(train, min_freq = 2)
TRG.build_vocab(train, min_freq = 2)

train_iterator, val_iterator, test_iterator = BucketIterator.splits(
    (train, val, test), 
     batch_size = BATCH_SIZE,
     device = device)

In [None]:
from models import Transformer

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_HID_DIM = 256
DEC_HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model3 = Transformer.Seq2Seq(input_dim=INPUT_DIM, 
                                  enc_hid_dim=ENC_HID_DIM,
                                  enc_layers=ENC_LAYERS,
                                  enc_heads=ENC_HEADS,
                                  enc_pf_dim=ENC_PF_DIM,
                                  enc_dropout=ENC_DROPOUT,
                                  output_dim=OUTPUT_DIM,
                                  dec_hid_dim=DEC_HID_DIM,
                                  dec_layers=DEC_LAYERS,
                                  dec_heads=DEC_HEADS,
                                  dec_pf_dim=DEC_PF_DIM,
                                  dec_dropout=DEC_DROPOUT,
                                  src_pad_idx=SRC_PAD_IDX,
                                  trg_pad_idx=TRG_PAD_IDX, 
                                  device=device).to(device)

In [None]:
model3.apply(init_weights)
optimizer = optim.Adam(model3.parameters(), lr=0.03, weight_decay=0.0001, betas=(0.9,0.99))
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

train_Transformer = TrainModel(model=model3,
                         train_iterator=train_iterator,
                         val_iterator=val_iterator,
                         optimizer=optimizer,
                         criterion=criterion,
                         model_type='Transformer')

In [None]:
N_EPOCHS = 10
CLIP = 1
train_Transformer.epoch(n_epochs=N_EPOCHS, clip=CLIP, model_name='transformer-model.pt')