In [1]:
from pipeline import Pipeline
from lang_pair import LangPair

from models.encoder import Encoder
from models.decoder import Decoder

from coach import Coach
import torch.optim as optim
import torch.nn as nn
import torch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
vi_vi_vocab, vi_en_vocab = Pipeline.load("vi_vi_train_10_chars_10k_vocab").data, Pipeline.load("vi_en_train_10_chars_10k_vocab").data
vi_vi_idxs, vi_en_idxs = Pipeline.load("vi_idx_10chars_filter").data

In [3]:
vi_en_pair = LangPair(vi_vi_idxs, vi_vi_vocab.eos_idx, vi_en_idxs, vi_en_vocab.eos_idx, device = device)

In [4]:
batch_size = 40
learning_rate = .1

In [5]:
enc_params = {
    "input_vocab_size": vi_vi_vocab.size,
    "embedding_size": 300,
    "hidden_size": 256,
    "n_layers": 1,
    "dropout": 0
}

In [6]:
dec_params = {
    "target_vocab_size": vi_en_vocab.size,
    "embedding_size": 300,
    "hidden_size": 256,
    "n_layers": 1,
    "dropout": 0,
    "batch_size": batch_size
}

In [8]:
encoder = Encoder(**enc_params).to(device)
decoder = Decoder(**dec_params).to(device)

In [9]:
enc_optimizer = optim.SGD(encoder.parameters(), lr = learning_rate)
dec_optimizer = optim.SGD(decoder.parameters(), lr = learning_rate)
loss_fn = nn.NLLLoss()

In [10]:
coach_params = {
    "lang_pair": vi_en_pair, 
    "encoder": encoder, 
    "enc_optimizer": enc_optimizer, 
    "decoder": decoder, 
    "dec_optimizer": dec_optimizer, 
    "loss_fn": loss_fn
}

coach = Coach(**coach_params)

In [11]:
training_params = {
    "learning_rate": learning_rate,
    "iterations": 75000,
    "print_interval": 5000,
    "batch_size": batch_size
}

In [12]:
coach.train(**training_params)

Training Iterations:   7%|▋         | 125/1875 [03:39<43:02,  1.48s/batch]

Interval 1 (/125) average loss: 10.6206



Training Iterations:  12%|█▏        | 225/1875 [06:42<1:03:11,  2.30s/batch]

KeyboardInterrupt: 