In [12]:
import pandas as pd
from itertools import islice
import torch
from torch.utils.data import DataLoader
import sys
sys.path.append(r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\Chest-X-ray-Diagnosis-Automated-Reporting-using-CNNs-and-LLMs---UDEM-PEF-Thesis-Fall-2025")

from utils.text_metrics import evaluate_all_metrics
from utils.temp_utils import *
from utils.gpt_models import DinoGPTCaptioner, DinoGPT2Captioner
from utils.chexpert_dataset import CheXpertDataset
from utils.padchest_dataset import PadChestGRDataset

# Data

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

CSV_PATH = r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\df_chexpert_plus_240401.csv"
IMG_ROOT = r"C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG"
TEXT_COL = "section_impression"
PATH_COL = "path_to_image"

IMG_SIZE = 224
MAX_LEN = 64
NUM_BATCH = 8

tf = dino_image_transform(img_size=IMG_SIZE)

ds_train = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="train", transform=tf, text_col=TEXT_COL)
ds_valid = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="valid", transform=tf, text_col=TEXT_COL)
ds_test = CheXpertDataset(img_root=IMG_ROOT, csv_path=CSV_PATH, split="test", transform=tf, text_col=TEXT_COL)

tokenizer = build_tokenizer_from_labels(gpt2=True)
pad_id = tokenizer.pad_token_id
eos_id = tokenizer.eos_token_id
bos_id = tokenizer.bos_token_id
collate_fn = CaptionCollate(tokenizer, pad_id)

train_loader = DataLoader(ds_train, batch_size=NUM_BATCH, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(ds_valid, batch_size=NUM_BATCH, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(ds_test, batch_size=NUM_BATCH, shuffle=False, collate_fn=collate_fn)

Using device: cuda
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG


# Model

In [14]:
# DINO ViT-S/16 hidden size is 384 
EMBEDDING_D_IMG = 384
N_PREFIX = (IMG_SIZE // 16) ** 2  # number of visual prefix tokens (including CLS)

def pick_heads(d_model, target_head_dim=64):
    h = max(1, round(d_model / target_head_dim))
    while d_model % h != 0: h -= 1
    return h

D_MODEL = 768
N_HEAD = pick_heads(D_MODEL, 64)  # -> 12


model = DinoGPT2Captioner(
    d_img=EMBEDDING_D_IMG,
    num_prefix_tokens=N_PREFIX,
    gpt2_name="gpt2",
    dino_model_id="facebook/dinov3-vits16-pretrain-lvd1689m",
    freeze_dino=True
).to(device)

# Print model parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total model parameters: {total_params / 1_000_000:.2f} Millions")

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable model parameters: {trainable_params / 1_000_000:.2f} Millions")

# Print model footprint
model_footprint_in_gb = (total_params * 4) * (1e-9)  # assuming 4 bytes per parameter (float32)
print(f"Approximate model footprint: {model_footprint_in_gb:.2f} GB")

# after model init
#model.decoder.lm_head.weight = model.decoder.tok_emb.weight  # weight tying

Total model parameters: 146.33 Millions
Trainable model parameters: 124.74 Millions
Approximate model footprint: 0.59 GB


# Train Parameters

In [15]:
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-2
)
loss = sequence_ce_loss
NUM_EPOCHS = 100
BATCHES_PER_EPOCH = 10

# Training

In [16]:
for epoch in range(NUM_EPOCHS):
    slice_train_loader = islice(train_loader, BATCHES_PER_EPOCH)
    slice_valid_loader = islice(valid_loader, BATCHES_PER_EPOCH)
    train_stats = train_one_epoch(model, slice_train_loader, optimizer, device, pad_id, num_batches=BATCHES_PER_EPOCH, loss_fn=loss, grad_clip=1.0)
    val_stats = evaluate(model, slice_valid_loader, device, pad_id, num_batches=BATCHES_PER_EPOCH, loss_fn=loss)
    print(f"Epoch {epoch + 1}: Train Loss={train_stats['loss']:.4f}, PPL={train_stats['ppl']:.2f} | "
            f"Val Loss={val_stats['val_loss']:.4f}, Val PPL={val_stats['val_ppl']:.2f}")

Training: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.27it/s]


Epoch 1: Train Loss=8.1417, PPL=4012.19 | Val Loss=7.1253, Val PPL=1255.83


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.28it/s]


Epoch 2: Train Loss=6.4348, PPL=667.27 | Val Loss=5.6649, Val PPL=294.01


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 3: Train Loss=5.4883, PPL=249.33 | Val Loss=5.2155, Val PPL=187.76


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 4: Train Loss=5.1508, PPL=175.15 | Val Loss=4.9477, Val PPL=143.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.13it/s]


Epoch 5: Train Loss=5.0149, PPL=153.21 | Val Loss=4.7999, Val PPL=124.73


Training: 100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 6: Train Loss=4.9790, PPL=149.78 | Val Loss=4.6990, Val PPL=112.43


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 7: Train Loss=4.7300, PPL=114.80 | Val Loss=4.6190, Val PPL=104.09


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 8: Train Loss=4.7583, PPL=118.74 | Val Loss=4.5744, Val PPL=99.22


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 9: Train Loss=4.7879, PPL=122.43 | Val Loss=4.5445, Val PPL=96.63


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 10: Train Loss=4.6355, PPL=104.96 | Val Loss=4.4887, Val PPL=91.38


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 11: Train Loss=4.6407, PPL=106.80 | Val Loss=4.4741, Val PPL=89.95


Training: 100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 12: Train Loss=4.6160, PPL=102.13 | Val Loss=4.4822, Val PPL=90.97


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 13: Train Loss=4.5598, PPL=96.16 | Val Loss=4.4662, Val PPL=89.24


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 14: Train Loss=4.5456, PPL=95.54 | Val Loss=4.4110, Val PPL=84.67


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 15: Train Loss=4.5732, PPL=99.02 | Val Loss=4.4164, Val PPL=84.86


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 16: Train Loss=4.6279, PPL=103.15 | Val Loss=4.4159, Val PPL=84.96


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 17: Train Loss=4.4892, PPL=89.48 | Val Loss=4.3956, Val PPL=83.19


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 18: Train Loss=4.4805, PPL=90.65 | Val Loss=4.3772, Val PPL=81.82


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 19: Train Loss=4.4200, PPL=85.50 | Val Loss=4.3752, Val PPL=81.60


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 20: Train Loss=4.5301, PPL=93.63 | Val Loss=4.3382, Val PPL=78.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 21: Train Loss=4.4530, PPL=86.60 | Val Loss=4.3506, Val PPL=79.58


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]


Epoch 22: Train Loss=4.2922, PPL=75.18 | Val Loss=4.3608, Val PPL=80.42


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 23: Train Loss=4.4426, PPL=86.13 | Val Loss=4.3394, Val PPL=78.67


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 24: Train Loss=4.3765, PPL=80.97 | Val Loss=4.3360, Val PPL=78.23


Training: 100%|██████████| 10/10 [00:05<00:00,  1.71it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.09it/s]


Epoch 25: Train Loss=4.3642, PPL=79.69 | Val Loss=4.3179, Val PPL=76.76


Training: 100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 26: Train Loss=4.3397, PPL=77.73 | Val Loss=4.2975, Val PPL=74.94


Training: 100%|██████████| 10/10 [00:05<00:00,  1.74it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 27: Train Loss=4.3711, PPL=81.24 | Val Loss=4.2965, Val PPL=75.06


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 28: Train Loss=4.3446, PPL=79.02 | Val Loss=4.2893, Val PPL=74.38


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 29: Train Loss=4.3724, PPL=82.46 | Val Loss=4.2954, Val PPL=74.86


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 30: Train Loss=4.4135, PPL=84.20 | Val Loss=4.2853, Val PPL=74.11


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 31: Train Loss=4.3276, PPL=77.36 | Val Loss=4.2723, Val PPL=73.43


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]


Epoch 32: Train Loss=4.3456, PPL=78.77 | Val Loss=4.2835, Val PPL=74.08


Training: 100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.08it/s]


Epoch 33: Train Loss=4.3311, PPL=77.17 | Val Loss=4.2505, Val PPL=71.65


Training: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 34: Train Loss=4.3216, PPL=76.54 | Val Loss=4.2426, Val PPL=71.08


Training: 100%|██████████| 10/10 [00:05<00:00,  1.77it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 35: Train Loss=4.2707, PPL=72.72 | Val Loss=4.2411, Val PPL=70.95


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 36: Train Loss=4.3360, PPL=77.57 | Val Loss=4.2317, Val PPL=70.36


Training: 100%|██████████| 10/10 [00:05<00:00,  1.68it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 37: Train Loss=4.2865, PPL=74.05 | Val Loss=4.2410, Val PPL=70.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.69it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 38: Train Loss=4.2625, PPL=71.98 | Val Loss=4.2454, Val PPL=71.26


Training: 100%|██████████| 10/10 [00:05<00:00,  1.70it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 39: Train Loss=4.3704, PPL=80.41 | Val Loss=4.2253, Val PPL=69.72


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 40: Train Loss=4.2761, PPL=73.05 | Val Loss=4.2188, Val PPL=69.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 41: Train Loss=4.3208, PPL=77.50 | Val Loss=4.2128, Val PPL=68.84


Training: 100%|██████████| 10/10 [00:05<00:00,  1.73it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.06it/s]


Epoch 42: Train Loss=4.2246, PPL=69.03 | Val Loss=4.2063, Val PPL=68.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.74it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 43: Train Loss=4.2057, PPL=67.87 | Val Loss=4.2081, Val PPL=68.55


Training: 100%|██████████| 10/10 [00:05<00:00,  1.74it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 44: Train Loss=4.2856, PPL=73.94 | Val Loss=4.2083, Val PPL=68.61


Training: 100%|██████████| 10/10 [00:05<00:00,  1.70it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.04it/s]


Epoch 45: Train Loss=4.3603, PPL=79.29 | Val Loss=4.1963, Val PPL=67.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 46: Train Loss=4.2191, PPL=69.32 | Val Loss=4.1900, Val PPL=67.18


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.02it/s]


Epoch 47: Train Loss=4.3161, PPL=76.75 | Val Loss=4.1903, Val PPL=67.11


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 48: Train Loss=4.2508, PPL=71.50 | Val Loss=4.1925, Val PPL=67.30


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 49: Train Loss=4.1500, PPL=63.89 | Val Loss=4.1892, Val PPL=67.26


Training: 100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 50: Train Loss=4.1852, PPL=67.01 | Val Loss=4.1825, Val PPL=66.86


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 51: Train Loss=4.1937, PPL=67.09 | Val Loss=4.1690, Val PPL=65.94


Training: 100%|██████████| 10/10 [00:05<00:00,  1.90it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.21it/s]


Epoch 52: Train Loss=4.2046, PPL=67.87 | Val Loss=4.1721, Val PPL=66.12


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 53: Train Loss=4.1790, PPL=65.90 | Val Loss=4.1582, Val PPL=65.20


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.21it/s]


Epoch 54: Train Loss=4.1632, PPL=66.20 | Val Loss=4.1601, Val PPL=65.38


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 55: Train Loss=4.1437, PPL=64.06 | Val Loss=4.1670, Val PPL=65.92


Training: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]


Epoch 56: Train Loss=4.2629, PPL=72.16 | Val Loss=4.1599, Val PPL=65.26


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 57: Train Loss=4.1929, PPL=66.77 | Val Loss=4.1466, Val PPL=64.35


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.24it/s]


Epoch 58: Train Loss=4.2414, PPL=70.50 | Val Loss=4.1481, Val PPL=64.55


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.25it/s]


Epoch 59: Train Loss=4.2153, PPL=70.00 | Val Loss=4.1521, Val PPL=64.89


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.23it/s]


Epoch 60: Train Loss=4.2120, PPL=68.22 | Val Loss=4.1526, Val PPL=64.97


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.20it/s]


Epoch 61: Train Loss=4.2454, PPL=71.87 | Val Loss=4.1504, Val PPL=64.71


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 62: Train Loss=4.2183, PPL=69.22 | Val Loss=4.1384, Val PPL=63.79


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.21it/s]


Epoch 63: Train Loss=4.1625, PPL=64.75 | Val Loss=4.1452, Val PPL=64.26


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 64: Train Loss=4.1392, PPL=63.23 | Val Loss=4.1351, Val PPL=63.83


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 65: Train Loss=4.2149, PPL=68.99 | Val Loss=4.1237, Val PPL=62.97


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 66: Train Loss=4.1603, PPL=65.48 | Val Loss=4.1296, Val PPL=63.25


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 67: Train Loss=4.0956, PPL=61.41 | Val Loss=4.1287, Val PPL=63.19


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 68: Train Loss=4.2580, PPL=71.84 | Val Loss=4.1334, Val PPL=63.42


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 69: Train Loss=4.0989, PPL=61.09 | Val Loss=4.1267, Val PPL=62.94


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 70: Train Loss=4.2108, PPL=68.44 | Val Loss=4.1413, Val PPL=63.83


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 71: Train Loss=4.1624, PPL=64.84 | Val Loss=4.1328, Val PPL=63.33


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 72: Train Loss=4.1807, PPL=67.43 | Val Loss=4.1257, Val PPL=63.05


Training: 100%|██████████| 10/10 [00:05<00:00,  1.73it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.06it/s]


Epoch 73: Train Loss=4.1000, PPL=61.02 | Val Loss=4.1332, Val PPL=63.66


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 74: Train Loss=4.0826, PPL=60.15 | Val Loss=4.1235, Val PPL=62.99


Training: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Epoch 75: Train Loss=4.1792, PPL=66.53 | Val Loss=4.1112, Val PPL=62.21


Training: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.08it/s]


Epoch 76: Train Loss=4.0990, PPL=61.48 | Val Loss=4.1211, Val PPL=62.84


Training: 100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.05it/s]


Epoch 77: Train Loss=4.2026, PPL=68.49 | Val Loss=4.1007, Val PPL=61.46


Training: 100%|██████████| 10/10 [00:05<00:00,  1.82it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 78: Train Loss=4.2004, PPL=68.33 | Val Loss=4.1007, Val PPL=61.47


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 79: Train Loss=4.1626, PPL=64.95 | Val Loss=4.1067, Val PPL=61.84


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.19it/s]


Epoch 80: Train Loss=4.2320, PPL=69.97 | Val Loss=4.0930, Val PPL=60.98


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 81: Train Loss=4.0691, PPL=59.41 | Val Loss=4.1000, Val PPL=61.54


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 82: Train Loss=4.1060, PPL=61.05 | Val Loss=4.0967, Val PPL=61.33


Training: 100%|██████████| 10/10 [00:06<00:00,  1.47it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.10it/s]


Epoch 83: Train Loss=4.1651, PPL=65.49 | Val Loss=4.1056, Val PPL=61.78


Training: 100%|██████████| 10/10 [00:05<00:00,  1.76it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 84: Train Loss=4.1061, PPL=61.52 | Val Loss=4.0909, Val PPL=60.94


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 85: Train Loss=4.1444, PPL=63.75 | Val Loss=4.0926, Val PPL=60.93


Training: 100%|██████████| 10/10 [00:05<00:00,  1.92it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 86: Train Loss=4.1235, PPL=62.72 | Val Loss=4.0956, Val PPL=61.21


Training: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 87: Train Loss=4.1741, PPL=66.93 | Val Loss=4.0974, Val PPL=61.31


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]


Epoch 88: Train Loss=4.1478, PPL=63.82 | Val Loss=4.0927, Val PPL=60.92


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 89: Train Loss=4.1482, PPL=64.01 | Val Loss=4.0831, Val PPL=60.32


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 90: Train Loss=4.1976, PPL=68.31 | Val Loss=4.0882, Val PPL=60.70


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 91: Train Loss=4.1010, PPL=61.16 | Val Loss=4.0930, Val PPL=60.94


Training: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 92: Train Loss=4.0523, PPL=58.31 | Val Loss=4.0864, Val PPL=60.59


Training: 100%|██████████| 10/10 [00:05<00:00,  1.88it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


Epoch 93: Train Loss=4.0874, PPL=60.03 | Val Loss=4.0796, Val PPL=60.15


Training: 100%|██████████| 10/10 [00:05<00:00,  1.84it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 94: Train Loss=4.1884, PPL=66.86 | Val Loss=4.0802, Val PPL=60.30


Training: 100%|██████████| 10/10 [00:05<00:00,  1.87it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.14it/s]


Epoch 95: Train Loss=4.1614, PPL=64.89 | Val Loss=4.0777, Val PPL=60.10


Training: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 96: Train Loss=4.0724, PPL=59.36 | Val Loss=4.0850, Val PPL=60.40


Training: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 97: Train Loss=4.1311, PPL=62.95 | Val Loss=4.0898, Val PPL=60.72


Training: 100%|██████████| 10/10 [00:05<00:00,  1.80it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.16it/s]


Epoch 98: Train Loss=4.1138, PPL=62.03 | Val Loss=4.0760, Val PPL=59.85


Training: 100%|██████████| 10/10 [00:05<00:00,  1.81it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.15it/s]


Epoch 99: Train Loss=4.0858, PPL=60.41 | Val Loss=4.0743, Val PPL=59.69


Training: 100%|██████████| 10/10 [00:05<00:00,  1.78it/s]
Evaluating: 100%|██████████| 10/10 [00:04<00:00,  2.12it/s]

Epoch 100: Train Loss=4.0892, PPL=60.26 | Val Loss=4.0762, Val PPL=59.90





# Test Parameters

In [17]:
BATCHES_PER_TEST = 1
GREEDY_DECODE = True
TEST_MAX_LEN = 256
TEST_TOP_P = 0.9
TEST_TEMPERATURE = 0.9

# Test

In [18]:
slice_test_loader = islice(test_loader, BATCHES_PER_TEST)
test_stats = evaluate(model, slice_test_loader, device, pad_id, num_batches=BATCHES_PER_TEST)
print(f"Test Loss={test_stats['val_loss']:.4f}, Test PPL={test_stats['val_ppl']:.2f}")

Evaluating: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]

Test Loss=4.0206, Test PPL=55.74





# Test Report Generation

In [19]:
with torch.no_grad():
    for pixel_values, ids_loader, paths, raw_labels in test_loader:
        pixel_values = pixel_values.to(device)
        gen_ids = model.generate(
            pixel_values=pixel_values,
            input_ids=ids_loader.to(device),
            max_new_tokens=64
        ).to(device)

        info = model.generate_with_logging(
            pixel_values=pixel_values,
            input_ids=ids_loader.to(device),
            tokenizer=tokenizer,
            preset="safe_sample",
            stop_sequences=["\n\n", "Impression:"],
            max_new_tokens=128,
        )
        print("out shape:", info["sequences"].shape)
        for i, s in enumerate(info["per_sample"]):
            print(f"[{i}] EOS={s['stopping']['hit_eos']} rep={s['repetition']}")
            print(s["text"].get("generated","")[:200])
            print("[Target text]", raw_labels[i])

        eval_results = evaluate_all_metrics(raw_labels, [s["text"]["generated"] for s in info["per_sample"]], evaluation_mode="CheXagent")
        for metric, scores in eval_results.items():
            print(f"{metric}: {scores}")



        print("Predictions (first batch):")
        for i in range(gen_ids.size(0)):
            text_gen = tokenizer.decode(gen_ids[i].tolist())
            text_tgt = tokenizer.decode(ids_loader[i].tolist())
            print(f"\nGEN {i+1}:", text_gen)
            print(f"TGT {i+1}:", text_tgt)
            try: 
                results = evaluate_all_metrics([text_tgt], [text_gen], evaluation_mode="CheXagent")
                for metric, scores in results.items():
                    print(f"{metric}: {scores}")
            except Exception as e:
                print("Error in evaluation:", e)
        del pixel_values, ids_loader, paths, raw_labels, gen_ids
        torch.cuda.empty_cache()
        break

out shape: torch.Size([8, 240])
[0] EOS=False rep={'max_token_run': 1, 'max_repeat_trigram': 1, 'max_repeat_4gram': 1}
cardiomegaly. stable appearance of right chest tube and left chest tube. stable cardiomegtal tube. no acute osseous abnormalities. stable mild pulmonary edema. stable bibasilar opiac opacification and
[Target text] interval placement of a right internal jugular venous sheath with the distal tip in the proximal superior vena cava. no pneumothorax. stable position of nasogastric tube feeding tube tracheostomy canula left internal jugular central venous catheter and left upper extremity picc. no significant interval change in hyperexpanded lung volumes right basilar opacities small bilateral pleural effusions tenting of the right hemidiaphragm and biapical pleural thickening.
[1] EOS=False rep={'max_token_run': 1, 'max_repeat_trigram': 1, 'max_repeat_4gram': 1}
the left internal jugular catheter is unchanged in position. stable positioning. stable appearance of the trache

In [20]:
text = "1.  STABLE SMALL LEFT INTERNAL JUGULAR OPACITIES WITH PATCHY TUBE AND NASOGASTRIC TUBES, RIGHT LOWER MEDIASTINAL SIDED CATHETER.  NO SIGNIFICANT CHANGE IN THE PREVIOUS STUDYDEMONSTRATE ATELECTASIS O"
text = text.lower()
encoded = tokenizer.encode(text)
words = text.split()
print("Number of words:", len(words), "Number of tokens:", len(encoded), "pad_id:", pad_id, "eos_id:", eos_id, "bos_id:", bos_id)
print("BOS token id:", tokenizer.bos_token_id, "EOS token id:", tokenizer.eos_token_id, "PAD token id:", tokenizer.pad_token_id)
print(encoded)
for token_id in encoded:    
    print(f"Token ID: {token_id}, Token: {tokenizer.decode([token_id])}")

Number of words: 27 Number of tokens: 48 pad_id: 50256 eos_id: 50256 bos_id: 50256
BOS token id: 50256 EOS token id: 50256 PAD token id: 50256
[50256, 16, 13, 220, 8245, 1402, 1364, 5387, 45808, 934, 1034, 330, 871, 351, 8529, 88, 12403, 290, 25221, 519, 459, 1173, 21103, 11, 826, 2793, 16957, 459, 1292, 34384, 3797, 43332, 13, 220, 645, 2383, 1487, 287, 262, 2180, 2050, 26567, 23104, 379, 9509, 17765, 267, 50256]
Token ID: 50256, Token: 
Token ID: 16, Token: 1
Token ID: 13, Token: .
Token ID: 220, Token:  
Token ID: 8245, Token:  stable
Token ID: 1402, Token:  small
Token ID: 1364, Token:  left
Token ID: 5387, Token:  internal
Token ID: 45808, Token:  jug
Token ID: 934, Token: ular
Token ID: 1034, Token:  op
Token ID: 330, Token: ac
Token ID: 871, Token: ities
Token ID: 351, Token:  with
Token ID: 8529, Token:  patch
Token ID: 88, Token: y
Token ID: 12403, Token:  tube
Token ID: 290, Token:  and
Token ID: 25221, Token:  nas
Token ID: 519, Token: og
Token ID: 459, Token: ast
Token ID: 

In [21]:
import re
import string

def clean_text(text: str) -> str:
    # lowercase
    text = text.lower()

    # remove enumerators like "1." or "23." but KEEP decimals like "2.5"
    # (?<!\d) ensures no digit right before; (?!\d) ensures no digit right after the dot
    text = re.sub(r'(?<!\d)\b\d+\.(?!\d)', ' ', text)

    # remove all punctuation EXCEPT "."
    punctuation = string.punctuation.replace('.', '')
    text = text.translate(str.maketrans('', '', punctuation))

    # normalize spaces around periods to " . " → ". "
    text = re.sub(r'\s*\.\s*', '. ', text)

    # collapse multiple spaces and trim
    text = re.sub(r'\s+', ' ', text).strip()

    return text

# Example
text = "1.  STABLE SMALL LEFT INTERNAL JUGULAR OPACITIES... 2.5 cm nodule; item 2. next. 3. Done."
print(clean_text(text))


# Example
text = """
 1.  INTERVAL PLACEMENT OF A RIGHT INTERNAL JUGULAR VENOUS SHEATH 
WITH THE DISTAL TIP IN THE PROXIMAL SUPERIOR VENA CAVA.  NO 
PNEUMOTHORAX.
 
 2.  STABLE POSITION OF NASOGASTRIC TUBE, FEEDING TUBE, TRACHEOSTOMY 
CANULA, LEFT INTERNAL JUGULAR CENTRAL VENOUS CATHETER, AND LEFT UPPER 
EXTREMITY PICC.  
 
 3.  NO SIGNIFICANT INTERVAL CHANGE IN HYPEREXPANDED LUNG VOLUMES, 
RIGHT BASILAR OPACITIES, SMALL BILATERAL PLEURAL EFFUSIONS, TENTING 
OF THE RIGHT HEMIDIAPHRAGM AND BIAPICAL PLEURAL THICKENING. 
 
 """
cleaned_text = clean_text(text)
print(cleaned_text)


stable small left internal jugular opacities. . . 2. 5 cm nodule item next. done.
interval placement of a right internal jugular venous sheath with the distal tip in the proximal superior vena cava. no pneumothorax. stable position of nasogastric tube feeding tube tracheostomy canula left internal jugular central venous catheter and left upper extremity picc. no significant interval change in hyperexpanded lung volumes right basilar opacities small bilateral pleural effusions tenting of the right hemidiaphragm and biapical pleural thickening.


In [22]:
encoded = tokenizer.encode(cleaned_text)
words = cleaned_text.split()
print("Number of words:", len(words), "Number of tokens:", len(encoded), "pad_id:", pad_id, "eos_id:", eos_id, "bos_id:", bos_id)
print("BOS token id:", tokenizer.bos_token_id, "EOS token id:", tokenizer.eos_token_id, "PAD token id:", tokenizer.pad_token_id)
print(encoded)
for token_id in encoded:    
    print(f"Token ID: {token_id}, Token: {tokenizer.decode([token_id])}")

Number of words: 65 Number of tokens: 112 pad_id: 50256 eos_id: 50256 bos_id: 50256
BOS token id: 50256 EOS token id: 50256 PAD token id: 50256
[50256, 3849, 2100, 13127, 286, 257, 826, 5387, 45808, 934, 8710, 516, 673, 776, 351, 262, 1233, 282, 8171, 287, 262, 14793, 4402, 9098, 410, 8107, 269, 4170, 13, 645, 29631, 849, 273, 897, 13, 8245, 2292, 286, 25221, 519, 459, 1173, 12403, 13017, 12403, 491, 4891, 455, 9145, 460, 4712, 1364, 5387, 45808, 934, 4318, 8710, 516, 3797, 43332, 290, 1364, 6727, 8963, 414, 8301, 66, 13, 645, 2383, 16654, 1487, 287, 20606, 21510, 79, 12249, 12317, 15343, 826, 1615, 1794, 1034, 330, 871, 1402, 24537, 3339, 1523, 914, 15880, 11105, 278, 286, 262, 826, 339, 13602, 72, 6570, 22562, 76, 290, 3182, 499, 605, 3339, 1523, 6546, 3101, 13, 50256]
Token ID: 50256, Token: 
Token ID: 3849, Token: inter
Token ID: 2100, Token: val
Token ID: 13127, Token:  placement
Token ID: 286, Token:  of
Token ID: 257, Token:  a
Token ID: 826, Token:  right
Token ID: 5387, Token: