In [10]:
%reload_ext autoreload
%autoreload 2

# test model loading and vocab loading
import torch
from transformer.transformer import Transformer
import torch.nn as nn
from trainer import Trainer
from data.translation_data import TranslationData

# Test Loading a model, vocabulary and validation

In [11]:
# create device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# load best model
model_path = './checkpoints/test_checkpoints_small/best_model.pt'
checkpoint = torch.load(model_path, map_location=device)

# load vocabs
src_vocab = checkpoint['src_vocab']
target_vocab = checkpoint['target_vocab']
pad_id = src_vocab['<pad>']

# load hypter parameters
args = checkpoint['args']
model = Transformer(vocab_size=args['vocab_size'], d_model=args['d_model'], n_heads=args['n_heads'],
                       max_len=args['max_len'], dropout_rate = args['dropout_rate'],
                       encoding_type='sinusoidal', hidden_ff_d=args['d_model']*4,
                       num_encoder_layers=args['num_encoder_layers'],
                       num_decoder_layers=args['num_encoder_layers']).to(device=device)

# load dataset
data_module = TranslationData(batch_size=args['batch_size'],
                              src_vocab=src_vocab, max_vocab_size=args['vocab_size'],
                              max_len=args['max_len'])
data_module.prepare_data()
# get validation loader
_, valid_loader, _ = data_module.get_dataloaders()

# create a trainer object for inference
loss_fn = nn.CrossEntropyLoss(ignore_index=pad_id)
trainer = Trainer(model=model, val_loader=valid_loader, loss_fn=loss_fn)
trainer.load_checkpoint(path=model_path)
# run validation only
val_loss, bleu_score = trainer.validate()
print(f"Val Loss: {val_loss:.04f} | BLEU Score: {bleu_score:.02f}")

Loading dataset...
Building vocabularies from training data...


Map (num_proc=4): 100%|██████████| 232825/232825 [00:20<00:00, 11542.49 examples/s]


Vocab sizes: src = 10000,target = 10000
Data Loaders ready
Cuda available: True


                                                                       

Val Loss: 4.6319 | BLEU Score: 3.96


# Test inference

In [13]:
# take a batch from the validation loader
src_batch, target_batch = next(iter(valid_loader))

# send to device
src_batch = src_batch.to(device)
src_batch_cpu = src_batch.cpu().tolist()
# run inference
src_sentences = trainer.decode_ids(id_sequences=src_batch, id2word=trainer.id2word_src)
translated_sentences = trainer.infer(src=src_batch, id2word=trainer.id2word_target, type='greedy')
translated_sentences_beam = trainer.infer(src=src_batch, id2word=trainer.id2word_target, type='beam')

# Print some translations
for idx in range(5):  # first 5 examples+
    print(f'Source sentences: {src_sentences[idx]}')
    print(f"Predicted Translation Greedy: {translated_sentences[idx]}")
    print(f"Predicted Translation Beam: {translated_sentences_beam[idx]}")
    print("="*50)

TypeError: Trainer.create_src_mask() missing 1 required positional argument: 'pad_token_id'