In [None]:
import torch
import torch.optim as optim
from model.transformer import Transformer
from utils.checkpoint import Checkpoint
from utils.clock import Clock
from utils.dataset import Dataset
from utils.evaluator import Evaluator
from utils.search import Beam, Greedy
from utils.tokenizer import Tokenizer, DualTokenizer
from utils.quantize import quantize_model
from utils.functional import read_data, save_module, save_model, save_config, parameter_count, model_size, graph, parse_config
from utils.train import train

In [None]:
config = parse_config("config.txt", verbose=True)
min_freq, maxlen, batch_size = config["min_freq"], config["maxlen"], config["batch_size"]
dm, dk, dv, nhead, layers, dff = config["dm"], config["dk"], config["dv"], config["nhead"], config["layers"], config["dff"]
bias, dropout, eps, scale  = config["bias"], config["dropout"], config["eps"], config["scale"]
lr, adam_eps, betas, weight_decay =  config["lr"], config["adam_eps"], config["betas"], config["weight_decay"]
factor, patience = config["factor"], config["patience"]
beam_width, alpha, search_eps, fast = config["beam_width"], config["alpha"], config["search_eps"], config["fast"]
eval_batch_size, goal_bleu, corpus_level, frequency, overwrite = config["eval_batch_size"], config["goal_bleu"], config["corpus_level"], \
                                                                config["frequency"], config["overwrite"]
warmups, epochs, clip = config["warmups"], config["epochs"], config["clip"]
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

In [None]:
english = Tokenizer()
german = Tokenizer()

In [None]:
train_inputs, train_labels = read_data(path="datasets/multi30k-train.en"), read_data(path="datasets/multi30k-train.de")
test_inputs, test_labels = read_data(path="datasets/multi30k-test.en"), read_data(path="datasets/multi30k-test.de")
english.train(train_inputs, min_freq=min_freq)
german.train(train_labels, min_freq=min_freq)

In [None]:
tokenizer = DualTokenizer(english, german)
trainset = Dataset(train_inputs, train_labels, tokenizer) 
testset = Dataset(test_inputs, test_labels, tokenizer)

In [None]:
trainframe = trainset.dataframe()
trainframe.head()

In [None]:
testframe = testset.dataframe()
testframe.head()

In [None]:
trainframe.describe()

In [None]:
testframe.describe()

In [None]:
source_vocab, target_vocab = tokenizer.vocab_size()
sos, eos, pad = tokenizer.getitem("<sos>", module="source"), tokenizer.getitem("<eos>", module="source"), \
                tokenizer.getitem("<pad>", module="source")
print(f"Number of input tokens: {source_vocab}\nNumber of output tokens: {target_vocab}")
print(f"Average training sequence length: {trainset.avgseq()}\nLongest training sequence length: {trainset.maxseq()}")
print(f"Average testing sequence length: {testset.avgseq()}\nLongest testing sequence length: {testset.maxseq()}")

In [None]:
dataloader = trainset.dataloader(maxlen=maxlen, batch_size=batch_size, shuffle=False, drop_last=False)
testloader = testset.dataloader(maxlen=maxlen, batch_size=eval_batch_size, shuffle=True, drop_last=False)
print(f"Maxlen: {maxlen}\nBatch Size: {batch_size}\nTest Batch Size: {eval_batch_size}")
print(f"Trainable Samples: {dataloader.size()}\nTestable Samples: {testloader.size()}")

In [None]:
save_module(tokenizer, path="experiment/tokenizer.pt", verbose=True)
save_module(dataloader, path="experiment/dataloader.pt", verbose=True)
save_module(testloader, path="experiment/testloader.pt", verbose=True)
save_config(config_path="config.txt", path="experiment/config.txt", verbose=True)

In [None]:
model = Transformer(source_vocab, target_vocab, maxlen, pad_id=pad, dm=dm, dk=dk, dv=dv, nhead=nhead, layers=layers, 
                    dff=dff, bias=bias, dropout=dropout, eps=eps, scale=scale)
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay, eps=adam_eps)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor, patience=patience)
checkpoint = Checkpoint(model, optimizer, scheduler, frequency=1, path="experiment/checkpoint.pt", overwrite=overwrite)
beam = Beam(sos, eos, maxlen, width=beam_width, alpha=alpha, eps=search_eps, fast=fast)
greedy = Greedy(sos, eos, maxlen, alpha=alpha, eps=search_eps)
evaluator = Evaluator(testloader, tokenizer, beam, goal_bleu=goal_bleu, corpus_level=corpus_level)
clock = Clock()
print(f"Number of Trainable Paramaters: {parameter_count(model):.1f}M\nSize of Model: {model_size(model):.1f}MB")

In [None]:
losses, test_losses, bleus = train(dataloader, model, optimizer, scheduler, evaluator, checkpoint, clock, epochs=3, 
                                   warmups=warmups, clip=clip, verbose=True, log="experiment/log.txt", device=device)

In [None]:
graph(losses, test_losses, bleus, path="experiment/metrics.jpg")

In [None]:
model_int8 = quantize_model(model, dtype=torch.qint8, inplace=False)
model_float16 = quantize_model(model, dtype=torch.float16, inplace=False)

In [None]:
save_model(model, path="experiment/model.pt", verbose=True)
save_model(model_int8, path="experiment/model_int8.pt", verbose=True)
save_model(model_float16, path="experiment/model_float16.pt", verbose=True)