In [None]:
from pipeline import Pipeline
from lang_pair import LangPair

from models.encoder import Encoder
from models.decoder import Decoder
from models.attn import Attn

from coach import Coach
from translator import Translator

import torch.optim as optim
import torch.nn as nn
import torch

import pandas as pd
import io

In [1]:
from train import main

params = {
    "lang_pair_path": "vi_en_lang_pair.pkl", 
    "hidden_size": 150, 
    "batch_size": 25,
    "learning_rate": .1,
    "hidden_size": 150,
    "embed_size": 300,
    "enc_layers": 1,
    "dec_layers": 1,
    "use_attn": False,
    "save_filename": "model_attn_test",
    "print_interval": 2000,
    "iterations": 20000,
    "num_epochs": None
}

main(**params)

***************
Training w/o attention
***************

Fetching batches...



HBox(children=(IntProgress(value=0, description='Training Iterations', max=800), HTML(value='')))

KeyboardInterrupt: 

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# vi_vi_vocab, vi_en_vocab = Pipeline.load("vi_vi_train").data, Pipeline.load("vi_en_train").data
# vi_en_pair = LangPair(vi_vi_vocab, vi_en_vocab, device = device)
# with open("transforms/vi_en_lang_pair.pkl", "wb+") as f:
#     torch.save(vi_en_pair, f)

In [4]:
with open("transforms/vi_en_lang_pair.pkl", "rb+") as f:
    lang_pair = torch.load(f)
with open("vi_en_validation_lang_pair.pkl", "rb+") as f:
    valid_lang_pair = torch.load(f)

In [5]:
hidden_size = 100
batch_size = 20
learning_rate = .1
embed_size = 250

In [6]:
enc_params = {
    "input_vocab_size": lang_pair.lang1_vocab.size,
    "hidden_size": hidden_size,
    "n_layers": 1,
    "dropout": 0,
    "embed_size": embed_size
}

In [7]:
dec_params = {
    "target_vocab_size": lang_pair.lang2_vocab.size,
    "hidden_size": hidden_size,
    "n_layers": 1,
    "dropout": 0,
}

In [8]:
attn_params = {
    "hidden_size": hidden_size,
    "method": "general"
}

In [16]:
attn = Attn(**attn_params).to(device)
encoder = Encoder(**enc_params).to(device)
decoder = Decoder(**dec_params).to(device)
decoder_attn = Decoder(**dec_params, attn = attn).to(device)

In [17]:
enc_optimizer = optim.SGD(encoder.parameters(), lr = learning_rate)
dec_optimizer = optim.SGD(decoder.parameters(), lr = learning_rate)
dec_attn_optimizer = optim.SGD(decoder_attn.parameters(), lr = learning_rate)
loss_fn = nn.NLLLoss()

In [18]:
coach_params = {
    "lang_pair": lang_pair, 
    "encoder": encoder, 
    "enc_optimizer": enc_optimizer, 
    "decoder": decoder, 
    "dec_optimizer": dec_optimizer, 
    "loss_fn": loss_fn,
    "device": device
}

coach_attn_params = {
    **coach_params,
    "dec_optimizer": dec_attn_optimizer,
    "decoder": decoder_attn
}

coach = Coach(**coach_params)
coach_attn = Coach(**coach_attn_params)

In [19]:
rand_training_params = {
    "learning_rate": learning_rate,
    "iterations": 10000,
    "print_interval": 1000,
    "batch_size": batch_size
}

epoch_training_params = {
    "num_epochs": 2,
    "print_interval": 5000,
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "percent_of_data": 1
}

In [20]:
# losses = coach.train_random(**rand_training_params)
losses = coach.train_random(**rand_training_params)
with open("model_test.pkl", "wb") as f:
    torch.save(coach, f)

Fetching batches...



Training Iterations:  10%|█         | 51/500 [01:45<15:41,  2.10s/ batch]

Interval (1/10) average loss: 10.1010


Training Iterations:  20%|██        | 101/500 [03:15<17:01,  2.56s/ batch]

Interval (2/10) average loss: 8.2698


Training Iterations:  30%|███       | 151/500 [04:43<06:32,  1.13s/ batch]

Interval (3/10) average loss: 7.3693


Training Iterations:  40%|████      | 201/500 [06:27<09:46,  1.96s/ batch]

Interval (4/10) average loss: 6.8652


Training Iterations:  50%|█████     | 251/500 [07:52<11:26,  2.76s/ batch]

Interval (5/10) average loss: 6.0817


Training Iterations:  60%|██████    | 301/500 [09:25<06:57,  2.10s/ batch]

Interval (6/10) average loss: 5.7378


Training Iterations:  70%|███████   | 351/500 [10:56<03:13,  1.30s/ batch]

Interval (7/10) average loss: 5.2971


Training Iterations:  80%|████████  | 401/500 [12:33<02:29,  1.51s/ batch]

Interval (8/10) average loss: 5.1179


Training Iterations:  90%|█████████ | 451/500 [14:00<01:15,  1.55s/ batch]

Interval (9/10) average loss: 4.9343


Training Iterations: 100%|██████████| 500/500 [15:53<00:00,  1.57s/ batch]


In [None]:
# losses = coach.train_random(**rand_training_params)

#attn_losses, attns = coach_attn.train_random(**rand_training_params)
# with open("model_attn_test.pkl", "wb") as f:
#     torch.save(coach_attn, f)

In [None]:
'''
https://arxiv.org/pdf/1703.03130.pdf
https://gist.github.com/tokestermw/eaa08f0637343ce55b022d9c5c73b872
https://github.com/flrngel/Self-Attentive-tensorflow/blob/master/model.py
'''
