In [33]:
%reload_ext autoreload
%autoreload 2

# test model loading and vocab loading
import torch
from transformer.transformer import Transformer
import torch.nn as nn
from trainer import Trainer
from data.translation_data import TranslationData
import sentencepiece as spm

# Test Loading a model, vocabulary and validation

In [34]:
# create device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# load best model
model_path = './checkpoints/en_fr_large_512_long/best_model.pt'
checkpoint = torch.load(model_path, map_location=device)

# load hypter parameters
args = checkpoint['args']

# get tokenizers form sentence piece
sp_tokenizer = spm.SentencePieceProcessor()
sp_tokenizer.load(args['sp_model_path'])

print(f"SP piece size ('vocab size'): {sp_tokenizer.get_piece_size()}")
model = Transformer(vocab_size=sp_tokenizer.get_piece_size(), d_model=args['d_model'], n_heads=args['n_heads'],
                       max_len=args['max_len'], dropout_rate = args['dropout_rate'],
                       hidden_ff_d=args['d_model']*4,
                       num_encoder_layers=args['num_layers'],
                       num_decoder_layers=args['num_layers'], encoding_type=args['encoding_type']).to(device=device)

# load dataset
data_module = TranslationData(src_lang='en', tgt_lang='fr', batch_size=args['batch_size'],
                              max_len=args['max_len'], tokenizer=sp_tokenizer, small_subset=True)
data_module.prepare_data()
# get validation loader
_, valid_loader, _ = data_module.get_dataloaders()

# create a trainer object for inference
loss_fn = nn.CrossEntropyLoss(ignore_index=data_module.special_tokens['<pad>'])
trainer = Trainer(model=model, val_loader=valid_loader, loss_fn=loss_fn, tokenizer=sp_tokenizer)
trainer.load_checkpoint(path=model_path)
# run validation only
val_loss, bleu_score = trainer.validate()
print(f"Val Loss: {val_loss:.04f} | BLEU Score: {bleu_score:.02f}")

SP piece size ('vocab size'): 16000
Loading dataset...
Small subset mode enabled for faster training
Data num_workers: 4
Data Loaders ready
Cuda available: True


                                                              

Val Loss: 2.0094 | BLEU Score: 23.64


# Test inference

In [70]:
# take a batch from the validation loader
#src_batch, tgt_batch = next(iter(valid_loader))

# send to device
#src_batch = src_batch.to(device)
# run inference
#src_sentences = trainer.decode_ids(id_sequences=src_batch,)
from scripts.infer import translate_sentences_non_batched
translated_sentences_beam, in_tokens, out_tokens, attention = translate_sentences_non_batched(trainer, sentences=['I want a hot dog'], decode_type='beam', beam_size=3,return_attention=True )

print(len(out_tokens[0]))
print(out_tokens)

# Print some translations
if False:
    for idx in range(5):  # first 5 examples+
        print(f'Source sentences: {src_sentences[idx]}')
        print(f"Predicted Translation Greedy: {translated_sentences[idx]}")
        print(f"Predicted Translation Beam: {translated_sentences_beam[idx]}")
        print("="*50)

Src shape:  torch.Size([1, 1, 1, 5])
Cross attention shape:  torch.Size([6, 3, 8, 1, 5])
Final step count:  21
Raw beam log probs: tensor([[-48.9237, -49.4311, -49.5963]], device='cuda:0')
Sequence lengths: tensor([[16., 17., 15.]], device='cuda:0')
Normalized scores: tensor([[-0.1911, -0.1710, -0.2204]], device='cuda:0')
Attention shape before going in:  22 ,  6 ,  torch.Size([3, 8, 1, 5])
Stacked dimension:  6 ,  torch.Size([3, 8, 22, 5])
final_attn.shape = torch.Size([1, 6, 8, 22, 5])
17
[['<s>', '▁Je', '▁veux', '▁un', '▁chien', '▁brû', 'le', '▁chaud', ',', '▁je', '▁veux', '▁une', '▁chaude', '▁hot', '▁j', '▁quand', '</s>']]
