In [1]:
import pandas as pd
from itertools import islice
import torch
from torch.utils.data import DataLoader
import sys
sys.path.append(r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\Chest-X-ray-Diagnosis-Automated-Reporting-using-CNNs-and-LLMs---UDEM-PEF-Thesis-Fall-2025")

from utils.text_metrics import evaluate_all_metrics
from utils.temp_utils import *
from utils.gpt_models import DinoGPTCaptioner, DinoGPT2Captioner
from utils.chexpert_dataset import CheXpertDataset
from utils.padchest_dataset import PadChestGRDataset

# Data

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

CSV_PATH = r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\df_chexpert_plus_240401.csv"
IMG_ROOT = r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG"
TEXT_COL = "section_impression"
PATH_COL = "path_to_image"

IMG_SIZE = 224
MAX_LEN = 64
NUM_BATCH = 8

tf = dino_image_transform(img_size=IMG_SIZE)

ds_train = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="train", transform=tf, text_col=TEXT_COL)
ds_valid = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="valid", transform=tf, text_col=TEXT_COL)
ds_test = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="test", transform=tf, text_col=TEXT_COL)

#labels = pd.read_csv(CSV_PATH)[TEXT_COL].tolist()

tokenizer = build_tokenizer_from_labels(captions=None)
pad_id = tokenizer.pad_token_id
eos_id = tokenizer.eos_token_id
bos_id = tokenizer.bos_token_id
collate_fn = CaptionCollate(tokenizer, pad_id)

train_loader = DataLoader(ds_train, batch_size=NUM_BATCH, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(ds_valid, batch_size=NUM_BATCH, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(ds_test, batch_size=NUM_BATCH, shuffle=False, collate_fn=collate_fn)

Using device: cuda
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG


In [3]:
tokenizer_size = tokenizer.vocab_size
print("Tokenizer size:", tokenizer_size)

Tokenizer size: 58996


# Model

In [4]:
# DINO ViT-S/16 hidden size is 384 
EMBEDDING_D_IMG = 384
N_PREFIX = (IMG_SIZE // 16) ** 2  # number of visual prefix tokens (including CLS)

def pick_heads(d_model, target_head_dim=64):
    h = max(1, round(d_model / target_head_dim))
    while d_model % h != 0: h -= 1
    return h

D_MODEL = 768
N_HEAD = pick_heads(D_MODEL, 64)  # -> 12


model = DinoGPTCaptioner(
    vocab_size=tokenizer.vocab_size,
    d_img=EMBEDDING_D_IMG,
    pad_id=pad_id,
    d_model=D_MODEL,
    n_layer=12,
    n_head=N_HEAD,
    n_prefix=N_PREFIX,           # number of visual prefix tokens
    max_seq_len=512,
    dino_model_id="facebook/dinov3-vits16-pretrain-lvd1689m",
    freeze_dino=False,
).to(device)

# Print model parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total model parameters: {total_params / 1_000_000:.2f} Millions")

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable model parameters: {trainable_params / 1_000_000:.2f} Millions")

# Print model footprint
model_footprint_in_gb = (total_params * 4) * (1e-9)  # assuming 4 bytes per parameter (float32)
print(f"Approximate model footprint: {model_footprint_in_gb:.2f} GB")

# after model init
model.decoder.lm_head.weight = model.decoder.tok_emb.weight  # weight tying

Total model parameters: 198.08 Millions
Trainable model parameters: 198.08 Millions
Approximate model footprint: 0.79 GB


# Train Parameters

In [None]:
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-2
)
loss = sequence_ce_loss
NUM_EPOCHS = 200
BATCHES_PER_EPOCH = 10

# Training

In [6]:
for epoch in range(NUM_EPOCHS):
    slice_train_loader = islice(train_loader, BATCHES_PER_EPOCH)
    slice_valid_loader = islice(valid_loader, BATCHES_PER_EPOCH)
    train_stats = train_one_epoch(model, slice_train_loader, optimizer, device, pad_id, num_batches=BATCHES_PER_EPOCH, loss_fn=loss, grad_clip=1.0)
    val_stats = evaluate(model, slice_valid_loader, device, pad_id, num_batches=BATCHES_PER_EPOCH, loss_fn=loss)
    print(f"Epoch {epoch + 1}: Train Loss={train_stats['loss']:.4f}, PPL={train_stats['ppl']:.2f} | "
            f"Val Loss={val_stats['val_loss']:.4f}, Val PPL={val_stats['val_ppl']:.2f}")

  with torch.cuda.amp.autocast(dtype=torch.bfloat16):
Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.30it/s]


Epoch 1: Train Loss=9.5798, PPL=19936.68 | Val Loss=8.4576, Val PPL=4758.16


Training: 100%|██████████| 10/10 [00:04<00:00,  2.00it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 2: Train Loss=7.8225, PPL=2647.57 | Val Loss=7.4969, Val PPL=1844.71


Training: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 3: Train Loss=7.4266, PPL=1730.32 | Val Loss=7.4413, Val PPL=1742.94


Training: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 4: Train Loss=7.3112, PPL=1527.86 | Val Loss=7.5721, Val PPL=2005.55


Training: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


Epoch 5: Train Loss=7.2470, PPL=1419.18 | Val Loss=7.0750, Val PPL=1210.67


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 6: Train Loss=7.0630, PPL=1211.02 | Val Loss=6.7899, Val PPL=915.41


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 7: Train Loss=6.7463, PPL=858.29 | Val Loss=6.5497, Val PPL=721.22


Training: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


Epoch 8: Train Loss=6.4648, PPL=682.88 | Val Loss=6.3127, Val PPL=574.36


Training: 100%|██████████| 10/10 [00:05<00:00,  2.00it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


Epoch 9: Train Loss=6.2969, PPL=562.31 | Val Loss=6.1346, Val PPL=481.79


Training: 100%|██████████| 10/10 [00:04<00:00,  2.02it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 10: Train Loss=6.1708, PPL=479.58 | Val Loss=5.9760, Val PPL=414.08


Training: 100%|██████████| 10/10 [00:04<00:00,  2.08it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.36it/s]


Epoch 11: Train Loss=6.0095, PPL=447.64 | Val Loss=5.8656, Val PPL=373.51


Training: 100%|██████████| 10/10 [00:04<00:00,  2.00it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.31it/s]


Epoch 12: Train Loss=5.7609, PPL=323.50 | Val Loss=5.7291, Val PPL=323.87


Training: 100%|██████████| 10/10 [00:04<00:00,  2.07it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.33it/s]


Epoch 13: Train Loss=5.6741, PPL=301.85 | Val Loss=5.6029, Val PPL=288.36


Training: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 14: Train Loss=5.5789, PPL=266.14 | Val Loss=5.5300, Val PPL=268.54


Training: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Epoch 15: Train Loss=5.6686, PPL=301.70 | Val Loss=5.4352, Val PPL=244.32


Training: 100%|██████████| 10/10 [00:04<00:00,  2.07it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.36it/s]


Epoch 16: Train Loss=5.5479, PPL=262.75 | Val Loss=5.3857, Val PPL=230.08


Training: 100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 17: Train Loss=5.2848, PPL=207.37 | Val Loss=5.3198, Val PPL=218.35


Training: 100%|██████████| 10/10 [00:04<00:00,  2.07it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 18: Train Loss=5.3810, PPL=219.95 | Val Loss=5.2873, Val PPL=208.90


Training: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Epoch 19: Train Loss=5.2345, PPL=190.87 | Val Loss=5.2455, Val PPL=201.76


Training: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 20: Train Loss=5.3850, PPL=231.54 | Val Loss=5.1912, Val PPL=191.32


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.35it/s]


Epoch 21: Train Loss=5.1638, PPL=177.38 | Val Loss=5.1339, Val PPL=180.34


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 22: Train Loss=5.1749, PPL=179.63 | Val Loss=5.1274, Val PPL=178.79


Training: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.34it/s]


Epoch 23: Train Loss=5.2785, PPL=202.52 | Val Loss=5.1036, Val PPL=174.90


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Epoch 24: Train Loss=5.1377, PPL=185.36 | Val Loss=5.1035, Val PPL=173.78


Training: 100%|██████████| 10/10 [00:04<00:00,  2.08it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 25: Train Loss=5.2003, PPL=183.72 | Val Loss=5.0798, Val PPL=167.98


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 26: Train Loss=5.2012, PPL=188.16 | Val Loss=5.0377, Val PPL=161.17


Training: 100%|██████████| 10/10 [00:04<00:00,  2.00it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 27: Train Loss=5.0519, PPL=163.59 | Val Loss=5.0242, Val PPL=159.56


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 28: Train Loss=5.0279, PPL=155.45 | Val Loss=4.9903, Val PPL=154.39


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 29: Train Loss=5.1046, PPL=175.42 | Val Loss=4.9811, Val PPL=152.11


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 30: Train Loss=5.0151, PPL=156.35 | Val Loss=4.9900, Val PPL=153.63


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 31: Train Loss=5.0030, PPL=151.73 | Val Loss=4.9532, Val PPL=148.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 32: Train Loss=5.0881, PPL=163.60 | Val Loss=4.9320, Val PPL=145.49


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 33: Train Loss=5.0855, PPL=170.06 | Val Loss=4.9149, Val PPL=142.91


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 34: Train Loss=4.9621, PPL=150.35 | Val Loss=4.8912, Val PPL=138.84


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 35: Train Loss=4.7106, PPL=112.38 | Val Loss=4.8649, Val PPL=135.30


Training: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 36: Train Loss=4.8714, PPL=134.24 | Val Loss=4.8554, Val PPL=135.22


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 37: Train Loss=4.8464, PPL=130.90 | Val Loss=4.8615, Val PPL=135.50


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 38: Train Loss=5.0476, PPL=161.24 | Val Loss=4.8486, Val PPL=133.32


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 39: Train Loss=4.8549, PPL=129.01 | Val Loss=4.8356, Val PPL=131.39


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 40: Train Loss=4.8891, PPL=135.26 | Val Loss=4.8516, Val PPL=133.65


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 41: Train Loss=4.8684, PPL=131.99 | Val Loss=4.8129, Val PPL=129.48


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 42: Train Loss=4.8605, PPL=132.77 | Val Loss=4.8069, Val PPL=127.84


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 43: Train Loss=4.7377, PPL=117.87 | Val Loss=4.7974, Val PPL=126.54


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 44: Train Loss=4.8524, PPL=131.87 | Val Loss=4.7895, Val PPL=125.47


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 45: Train Loss=4.9836, PPL=151.08 | Val Loss=4.7641, Val PPL=122.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 46: Train Loss=4.9038, PPL=138.36 | Val Loss=4.7383, Val PPL=119.16


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 47: Train Loss=4.9543, PPL=152.05 | Val Loss=4.7441, Val PPL=119.66


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 48: Train Loss=4.8480, PPL=129.99 | Val Loss=4.7578, Val PPL=121.31


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 49: Train Loss=4.8209, PPL=130.61 | Val Loss=4.7367, Val PPL=118.61


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 50: Train Loss=4.8310, PPL=128.41 | Val Loss=4.7282, Val PPL=118.20


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 51: Train Loss=4.7293, PPL=117.03 | Val Loss=4.7288, Val PPL=117.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 52: Train Loss=4.7577, PPL=118.67 | Val Loss=4.7253, Val PPL=117.79


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 53: Train Loss=4.8927, PPL=138.62 | Val Loss=4.7121, Val PPL=116.16


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 54: Train Loss=4.8134, PPL=125.70 | Val Loss=4.7035, Val PPL=115.70


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 55: Train Loss=4.7340, PPL=114.70 | Val Loss=4.6941, Val PPL=114.58


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 56: Train Loss=4.6950, PPL=111.26 | Val Loss=4.6982, Val PPL=114.72


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 57: Train Loss=4.7256, PPL=115.60 | Val Loss=4.6921, Val PPL=113.22


Training: 100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 58: Train Loss=4.7343, PPL=118.12 | Val Loss=4.6705, Val PPL=111.49


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 59: Train Loss=4.8389, PPL=129.33 | Val Loss=4.6507, Val PPL=109.44


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 60: Train Loss=4.8045, PPL=127.68 | Val Loss=4.6445, Val PPL=107.85


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 61: Train Loss=4.6480, PPL=105.90 | Val Loss=4.6388, Val PPL=107.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 62: Train Loss=4.7307, PPL=116.65 | Val Loss=4.6395, Val PPL=107.64


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 63: Train Loss=4.7332, PPL=118.76 | Val Loss=4.6650, Val PPL=110.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 64: Train Loss=4.7900, PPL=124.10 | Val Loss=4.6368, Val PPL=107.66


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 65: Train Loss=4.6358, PPL=108.64 | Val Loss=4.6324, Val PPL=106.95


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.27it/s]


Epoch 66: Train Loss=4.5790, PPL=99.97 | Val Loss=4.6252, Val PPL=106.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 67: Train Loss=4.6884, PPL=110.18 | Val Loss=4.6169, Val PPL=105.14


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 68: Train Loss=4.7989, PPL=124.74 | Val Loss=4.6193, Val PPL=105.50


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 69: Train Loss=4.6842, PPL=113.31 | Val Loss=4.6205, Val PPL=105.44


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.21it/s]


Epoch 70: Train Loss=4.6804, PPL=115.07 | Val Loss=4.5953, Val PPL=102.93


Training: 100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 71: Train Loss=4.7743, PPL=121.23 | Val Loss=4.6089, Val PPL=104.72


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 72: Train Loss=4.6668, PPL=106.00 | Val Loss=4.5943, Val PPL=103.07


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.21it/s]


Epoch 73: Train Loss=4.5976, PPL=101.05 | Val Loss=4.6123, Val PPL=104.67


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 74: Train Loss=4.5966, PPL=102.44 | Val Loss=4.6013, Val PPL=103.51


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 75: Train Loss=4.6178, PPL=102.80 | Val Loss=4.5979, Val PPL=103.28


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 76: Train Loss=4.6077, PPL=103.31 | Val Loss=4.5945, Val PPL=102.74


Training: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 77: Train Loss=4.6079, PPL=102.03 | Val Loss=4.5709, Val PPL=100.41


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 78: Train Loss=4.7776, PPL=122.07 | Val Loss=4.5810, Val PPL=101.15


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 79: Train Loss=4.6537, PPL=108.12 | Val Loss=4.5618, Val PPL=99.45


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 80: Train Loss=4.6749, PPL=109.71 | Val Loss=4.5627, Val PPL=99.39


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 81: Train Loss=4.5977, PPL=103.77 | Val Loss=4.5682, Val PPL=100.47


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 82: Train Loss=4.6814, PPL=109.87 | Val Loss=4.5688, Val PPL=100.59


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 83: Train Loss=4.6447, PPL=106.95 | Val Loss=4.5536, Val PPL=98.51


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 84: Train Loss=4.5532, PPL=96.19 | Val Loss=4.5610, Val PPL=99.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 85: Train Loss=4.6496, PPL=107.33 | Val Loss=4.5513, Val PPL=98.14


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 86: Train Loss=4.7092, PPL=115.32 | Val Loss=4.5661, Val PPL=100.12


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 87: Train Loss=4.6360, PPL=104.07 | Val Loss=4.5454, Val PPL=97.90


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 88: Train Loss=4.5677, PPL=99.25 | Val Loss=4.5422, Val PPL=97.20


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 89: Train Loss=4.7602, PPL=120.77 | Val Loss=4.5493, Val PPL=97.88


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 90: Train Loss=4.5472, PPL=97.40 | Val Loss=4.5509, Val PPL=97.93


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 91: Train Loss=4.6563, PPL=107.00 | Val Loss=4.5312, Val PPL=96.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 92: Train Loss=4.4901, PPL=90.54 | Val Loss=4.5236, Val PPL=95.67


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 93: Train Loss=4.5523, PPL=96.01 | Val Loss=4.5203, Val PPL=95.12


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 94: Train Loss=4.5459, PPL=97.81 | Val Loss=4.5216, Val PPL=94.89


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 95: Train Loss=4.5672, PPL=98.63 | Val Loss=4.5052, Val PPL=93.64


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 96: Train Loss=4.6258, PPL=104.87 | Val Loss=4.5103, Val PPL=93.85


Training: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 97: Train Loss=4.5663, PPL=104.20 | Val Loss=4.5097, Val PPL=94.52


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 98: Train Loss=4.6384, PPL=104.66 | Val Loss=4.5330, Val PPL=96.64


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 99: Train Loss=4.6035, PPL=102.01 | Val Loss=4.5295, Val PPL=95.99


Training: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 100: Train Loss=4.6113, PPL=104.34 | Val Loss=4.5113, Val PPL=94.30


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 101: Train Loss=4.6589, PPL=107.27 | Val Loss=4.5136, Val PPL=94.77


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 102: Train Loss=4.5618, PPL=97.72 | Val Loss=4.5181, Val PPL=94.93


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 103: Train Loss=4.5046, PPL=92.15 | Val Loss=4.5138, Val PPL=94.71


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 104: Train Loss=4.5643, PPL=98.95 | Val Loss=4.5077, Val PPL=94.00


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 105: Train Loss=4.5120, PPL=92.32 | Val Loss=4.5087, Val PPL=93.86


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 106: Train Loss=4.5473, PPL=96.91 | Val Loss=4.5107, Val PPL=94.27


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 107: Train Loss=4.5854, PPL=101.10 | Val Loss=4.4972, Val PPL=92.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 108: Train Loss=4.4772, PPL=89.65 | Val Loss=4.5061, Val PPL=94.05


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 109: Train Loss=4.6748, PPL=110.60 | Val Loss=4.5137, Val PPL=94.37


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 110: Train Loss=4.5236, PPL=93.63 | Val Loss=4.5018, Val PPL=93.05


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 111: Train Loss=4.6052, PPL=103.01 | Val Loss=4.4795, Val PPL=91.23


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 112: Train Loss=4.5821, PPL=99.34 | Val Loss=4.4708, Val PPL=90.32


Training: 100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 113: Train Loss=4.4011, PPL=84.34 | Val Loss=4.4777, Val PPL=90.60


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 114: Train Loss=4.5654, PPL=97.25 | Val Loss=4.5057, Val PPL=93.38


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 115: Train Loss=4.5631, PPL=98.76 | Val Loss=4.5049, Val PPL=93.18


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 116: Train Loss=4.4946, PPL=91.15 | Val Loss=4.5026, Val PPL=93.00


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 117: Train Loss=4.6145, PPL=104.38 | Val Loss=4.4934, Val PPL=92.31


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 118: Train Loss=4.5833, PPL=100.50 | Val Loss=4.4927, Val PPL=92.21


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 119: Train Loss=4.4306, PPL=86.69 | Val Loss=4.4720, Val PPL=90.18


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 120: Train Loss=4.5219, PPL=94.61 | Val Loss=4.4767, Val PPL=90.48


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 121: Train Loss=4.4756, PPL=89.94 | Val Loss=4.4671, Val PPL=89.89


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 122: Train Loss=4.4374, PPL=86.36 | Val Loss=4.4709, Val PPL=90.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 123: Train Loss=4.4499, PPL=87.04 | Val Loss=4.4736, Val PPL=90.41


Training: 100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 124: Train Loss=4.3835, PPL=81.14 | Val Loss=4.4849, Val PPL=91.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 125: Train Loss=4.5794, PPL=100.70 | Val Loss=4.4584, Val PPL=89.18


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 126: Train Loss=4.5126, PPL=93.74 | Val Loss=4.4779, Val PPL=90.96


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 127: Train Loss=4.4839, PPL=90.77 | Val Loss=4.4520, Val PPL=88.91


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 128: Train Loss=4.4962, PPL=90.88 | Val Loss=4.4527, Val PPL=89.00


Training: 100%|██████████| 10/10 [00:04<00:00,  2.01it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 129: Train Loss=4.5113, PPL=93.07 | Val Loss=4.4621, Val PPL=89.55


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 130: Train Loss=4.5420, PPL=96.03 | Val Loss=4.4418, Val PPL=87.78


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 131: Train Loss=4.5819, PPL=99.28 | Val Loss=4.4347, Val PPL=86.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.28it/s]


Epoch 132: Train Loss=4.5638, PPL=97.68 | Val Loss=4.4594, Val PPL=89.22


Training: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 133: Train Loss=4.4925, PPL=91.94 | Val Loss=4.4571, Val PPL=89.13


Training: 100%|██████████| 10/10 [00:05<00:00,  1.99it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 134: Train Loss=4.3740, PPL=81.94 | Val Loss=4.4542, Val PPL=88.95


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 135: Train Loss=4.5567, PPL=98.26 | Val Loss=4.4457, Val PPL=88.15


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 136: Train Loss=4.4728, PPL=89.39 | Val Loss=4.4514, Val PPL=88.93


Training: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.26it/s]


Epoch 137: Train Loss=4.5152, PPL=92.72 | Val Loss=4.4557, Val PPL=88.92


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 138: Train Loss=4.4780, PPL=90.47 | Val Loss=4.4464, Val PPL=88.37


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 139: Train Loss=4.4292, PPL=84.89 | Val Loss=4.4534, Val PPL=88.63


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 140: Train Loss=4.3363, PPL=77.54 | Val Loss=4.4440, Val PPL=87.91


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 141: Train Loss=4.4620, PPL=87.57 | Val Loss=4.4316, Val PPL=86.77


Training: 100%|██████████| 10/10 [00:05<00:00,  1.97it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.31it/s]


Epoch 142: Train Loss=4.4975, PPL=92.88 | Val Loss=4.4274, Val PPL=86.39


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 143: Train Loss=4.5262, PPL=94.18 | Val Loss=4.4277, Val PPL=86.17


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 144: Train Loss=4.4829, PPL=90.55 | Val Loss=4.4345, Val PPL=86.76


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 145: Train Loss=4.6764, PPL=111.86 | Val Loss=4.4303, Val PPL=86.46


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 146: Train Loss=4.3939, PPL=81.70 | Val Loss=4.4270, Val PPL=86.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 147: Train Loss=4.5752, PPL=99.11 | Val Loss=4.4249, Val PPL=86.02


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Epoch 148: Train Loss=4.4383, PPL=85.68 | Val Loss=4.4231, Val PPL=86.07


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 149: Train Loss=4.4270, PPL=85.30 | Val Loss=4.4288, Val PPL=86.64


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 150: Train Loss=4.4127, PPL=84.28 | Val Loss=4.4363, Val PPL=87.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 151: Train Loss=4.4845, PPL=91.68 | Val Loss=4.4305, Val PPL=86.79


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 152: Train Loss=4.5638, PPL=97.75 | Val Loss=4.4137, Val PPL=85.27


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 153: Train Loss=4.4306, PPL=84.93 | Val Loss=4.4327, Val PPL=87.10


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 154: Train Loss=4.5002, PPL=93.41 | Val Loss=4.4249, Val PPL=86.28


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 155: Train Loss=4.4977, PPL=92.60 | Val Loss=4.4328, Val PPL=87.04


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 156: Train Loss=4.4767, PPL=89.85 | Val Loss=4.4338, Val PPL=87.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 157: Train Loss=4.5009, PPL=91.48 | Val Loss=4.4349, Val PPL=87.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 158: Train Loss=4.5391, PPL=94.62 | Val Loss=4.4285, Val PPL=86.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 159: Train Loss=4.4380, PPL=85.88 | Val Loss=4.4213, Val PPL=86.28


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 160: Train Loss=4.2802, PPL=72.67 | Val Loss=4.4226, Val PPL=86.29


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 161: Train Loss=4.4912, PPL=94.80 | Val Loss=4.4241, Val PPL=86.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 162: Train Loss=4.5403, PPL=95.42 | Val Loss=4.4262, Val PPL=86.68


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 163: Train Loss=4.4010, PPL=83.57 | Val Loss=4.4459, Val PPL=88.39


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 164: Train Loss=4.2822, PPL=73.81 | Val Loss=4.4309, Val PPL=87.06


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 165: Train Loss=4.4972, PPL=93.56 | Val Loss=4.4109, Val PPL=85.02


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 166: Train Loss=4.5567, PPL=96.30 | Val Loss=4.4078, Val PPL=84.80


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 167: Train Loss=4.4529, PPL=87.61 | Val Loss=4.4012, Val PPL=84.22


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 168: Train Loss=4.4568, PPL=86.93 | Val Loss=4.4195, Val PPL=86.25


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 169: Train Loss=4.4856, PPL=90.36 | Val Loss=4.4087, Val PPL=85.25


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 170: Train Loss=4.4329, PPL=85.58 | Val Loss=4.4080, Val PPL=84.83


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 171: Train Loss=4.4135, PPL=85.21 | Val Loss=4.4242, Val PPL=86.32


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 172: Train Loss=4.4984, PPL=90.48 | Val Loss=4.4150, Val PPL=85.51


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 173: Train Loss=4.4310, PPL=86.52 | Val Loss=4.4119, Val PPL=85.31


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 174: Train Loss=4.4741, PPL=89.22 | Val Loss=4.4178, Val PPL=86.08


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 175: Train Loss=4.4776, PPL=89.32 | Val Loss=4.4065, Val PPL=84.76


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 176: Train Loss=4.4341, PPL=86.92 | Val Loss=4.4085, Val PPL=85.11


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 177: Train Loss=4.3768, PPL=81.26 | Val Loss=4.4234, Val PPL=86.25


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 178: Train Loss=4.5248, PPL=93.98 | Val Loss=4.4162, Val PPL=85.62


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 179: Train Loss=4.4224, PPL=84.97 | Val Loss=4.4084, Val PPL=84.71


Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 180: Train Loss=4.4378, PPL=86.70 | Val Loss=4.4126, Val PPL=84.86


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 181: Train Loss=4.4452, PPL=87.25 | Val Loss=4.3854, Val PPL=82.56


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 182: Train Loss=4.5271, PPL=96.44 | Val Loss=4.3861, Val PPL=82.63


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 183: Train Loss=4.4047, PPL=82.77 | Val Loss=4.3952, Val PPL=83.58


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 184: Train Loss=4.4076, PPL=83.46 | Val Loss=4.3949, Val PPL=83.53


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 185: Train Loss=4.5771, PPL=99.11 | Val Loss=4.3942, Val PPL=83.56


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 186: Train Loss=4.4478, PPL=86.11 | Val Loss=4.3921, Val PPL=83.42


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 187: Train Loss=4.5043, PPL=92.34 | Val Loss=4.3908, Val PPL=83.38


Training: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 188: Train Loss=4.4862, PPL=89.36 | Val Loss=4.3744, Val PPL=82.12


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 189: Train Loss=4.4353, PPL=86.28 | Val Loss=4.3693, Val PPL=81.37


Training: 100%|██████████| 10/10 [00:05<00:00,  1.93it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.32it/s]


Epoch 190: Train Loss=4.5188, PPL=93.74 | Val Loss=4.3839, Val PPL=82.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.98it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 191: Train Loss=4.3817, PPL=81.47 | Val Loss=4.3909, Val PPL=82.91


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 192: Train Loss=4.4873, PPL=90.87 | Val Loss=4.3984, Val PPL=83.35


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 193: Train Loss=4.4995, PPL=91.71 | Val Loss=4.3819, Val PPL=82.02


Training: 100%|██████████| 10/10 [00:05<00:00,  1.95it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 194: Train Loss=4.4411, PPL=87.86 | Val Loss=4.3660, Val PPL=81.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 195: Train Loss=4.3155, PPL=77.88 | Val Loss=4.3712, Val PPL=81.93


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 196: Train Loss=4.4303, PPL=85.96 | Val Loss=4.3836, Val PPL=82.65


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 197: Train Loss=4.3843, PPL=82.04 | Val Loss=4.3896, Val PPL=83.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.94it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 198: Train Loss=4.4115, PPL=85.38 | Val Loss=4.3850, Val PPL=82.77


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 199: Train Loss=4.4025, PPL=83.92 | Val Loss=4.3790, Val PPL=82.13


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]

Epoch 200: Train Loss=4.4217, PPL=84.78 | Val Loss=4.3736, Val PPL=81.86





# Test Parameters

In [7]:
BATCHES_PER_TEST = 1
GREEDY_DECODE = True
TEST_MAX_LEN = 256
TEST_TOP_P = 0.9
TEST_TEMPERATURE = 0.9

# Test

In [8]:
slice_test_loader = islice(test_loader, BATCHES_PER_TEST)
test_stats = evaluate(model, slice_test_loader, device, pad_id, num_batches=BATCHES_PER_TEST)
print(f"Test Loss={test_stats['val_loss']:.4f}, Test PPL={test_stats['val_ppl']:.2f}")

Evaluating: 100%|██████████| 1/1 [00:00<00:00,  1.91it/s]

Test Loss=4.2768, Test PPL=72.01





# Test Report Generation

In [9]:
with torch.no_grad():
    for pixel_values, ids_loader, paths, raw_labels in train_loader:
        pixel_values = pixel_values.to(device)
        gen_ids = model.generate(
            pixel_values=pixel_values,
            bos_id=bos_id,
            eos_id=eos_id,
            max_new_tokens=TEST_MAX_LEN,
            beam_size=3,                # Set your desired beam size
            temperature=TEST_TEMPERATURE
        )

        info = model.generate_with_logging(
            pixel_values=pixel_values,          # [B, C, H, W]
            bos_id=tokenizer.bos_token_id,
            eos_id=tokenizer.eos_token_id,
            tokenizer=tokenizer,
            preset="safe_sample",
            stop_sequences=["\n\n", "Impression:"],
            max_new_tokens=256,
        )

        print("batch sequences shape:", info["sequences"].shape)
        for i, s in enumerate(info["per_sample"]):
            print(f"[sample {i}] hit_eos={s['stopping']['hit_eos']} repetition={s['repetition']}")
            if "generated" in s["text"]:
                print(s["text"]["generated"])
                print("[Target text]", raw_labels[i])

        eval_results = evaluate_all_metrics(raw_labels, [s["text"]["generated"] for s in info["per_sample"]], evaluation_mode="CheXagent")
        for metric, scores in eval_results.items():
            print(f"{metric}: {scores}")


        print("Predictions (first batch):")
        for i in range(gen_ids.size(0)):
            text_gen = tokenizer.decode(gen_ids[i].tolist())
            text_tgt = tokenizer.decode(ids_loader[i].tolist())
            print(f"\nGEN {i+1}:", text_gen)
            print(f"TGT {i+1}:", text_tgt)
            results = evaluate_all_metrics([text_tgt], [text_gen], evaluation_mode="CheXagent")
            for metric, scores in results.items():
                print(f"{metric}: {scores}")
        del pixel_values, ids_loader, paths, raw_labels, gen_ids
        torch.cuda.empty_cache()
        break

batch sequences shape: torch.Size([8, 73])
[sample 0] hit_eos=True repetition={'max_token_run': 1, 'max_repeat_trigram': 1, 'max_repeat_4gram': 1}
lines and tubes are unchanged. low lung volumes with mild pulmonary edema. retrocardiac opacity likely atelectasis or consolidation.
[Target text] stable position of et tube feeding tube ng tube and left sided subclavian central venous catheter whose tip is at the left brachiocephalicsuperior vena cava junction and rightsided picc line whose tip is not clearly visualized. persistent and stable appearance of bibasilar patchy air space opacities with associated bilateral pleural effusions. persistent and stable appearance of mild pulmonary edema is again noted.
[sample 1] hit_eos=True repetition={'max_token_run': 1, 'max_repeat_trigram': 1, 'max_repeat_4gram': 1}
interval placement of the endotracheal tube with tip approximately 1 cm above the carina. otherwise other lines and tubes are unchanged. low lung volumes. there is no evidence of pneu