In [None]:
from pathlib import Path
import torch
import torch.nn as nn
from config import get_config, latest_weights_file_path
from train import get_model, get_ds, run_validation
from torch.utils.tensorboard import SummaryWriter


In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
config = get_config()
train_dataloader, val_dataloader, tokenizer_src, tokenizer_tgt = get_ds(config)
model = get_model(config, tokenizer_src.get_vocab_size(), tokenizer_tgt.get_vocab_size()).to(device)

# Tensorboard
writer = SummaryWriter("runs/training_model")

# Load the pretrained weights
model_filename = latest_weights_file_path(config)
state = torch.load(model_filename)
model.load_state_dict(state['model_state_dict'])

In [None]:
from datasets import load_dataset
from train import get_or_build_tokenizer
from dataset import BilingualDataset
from torch.utils.data import Dataset, DataLoader

In [None]:
test_raw = load_dataset(f"{config['datasource']}", f"{config['ds_config']}", split='test')
tokenizer_src = get_or_build_tokenizer(config, test_raw, config['lang_src'])
tokenizer_tgt = get_or_build_tokenizer(config, test_raw, config['lang_tgt'])
test_ds = BilingualDataset(test_raw, tokenizer_src, tokenizer_tgt, config['lang_src'], config['lang_tgt'], config['seq_len'])
test_dataloader = DataLoader(test_ds, batch_size=1, shuffle=False)
num = len(test_raw)

In [None]:
run_validation(model, test_dataloader, tokenizer_src, tokenizer_tgt, config['seq_len'], device, lambda msg: print(msg), 0, writer, num_examples=num)