In [14]:
from GPT import *


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

valid_df = build_valid_df(CSV_PATH, IMG_ROOT)
if valid_df.empty:
    print("[WARN] No valid rows found; check paths and PNG conversion.")

labels_as_str = valid_df[TEXT_COL].astype(str).tolist()
tokenizer = build_tokenizer_from_labels(labels_as_str)
pad_id = getattr(tokenizer, "pad_token_id", 0)
bos_id = getattr(tokenizer, "bos_token_id", 1)
eos_id = getattr(tokenizer, "eos_token_id", 2)

# DINO expects 224 or 518 square; 224 is fine here
IMG_SIZE = 1024
tf = dino_image_transform(img_size=IMG_SIZE)
ds = CheXpertDataset(img_root=IMG_ROOT, csv=valid_df, transform=tf, text_col=TEXT_COL)
collate_fn = CaptionCollate(tokenizer, pad_id)

is_windows = os.name == "nt"
num_workers = 0 if is_windows else 2
persistent_workers = False if num_workers == 0 else True

# Full loader (used to sample subsets below)
full_loader = DataLoader(
    ds,
    batch_size=8,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    persistent_workers=persistent_workers,
    collate_fn=collate_fn
)

# Simple split
n_total = len(ds)
n_train = int(n_total * 0.8)
n_valid = int(n_total * 0.9)
indices = torch.randperm(n_total).tolist()
train_idx, valid_idx, test_idx = indices[:n_train], indices[n_train:n_valid], indices[n_valid:]
train_ds = torch.utils.data.Subset(ds, train_idx)
valid_ds = torch.utils.data.Subset(ds, valid_idx)
test_ds = torch.utils.data.Subset(ds, test_idx)
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, collate_fn=collate_fn, num_workers=num_workers)
valid_loader = DataLoader(valid_ds, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=num_workers)
test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, collate_fn=collate_fn, num_workers=num_workers)

# DINO ViT-S/16 hidden size is 384 (for this checkpoint); adjust if you change encoder
D_IMG = 384
N_PREFIX = 1 #(IMG_SIZE // 16) ** 2  # number of visual prefix tokens (including CLS)
model = DinoGPTCaptioner(
    vocab_size=tokenizer.vocab_size,
    d_img=D_IMG,
    pad_id=pad_id,
    d_model=512,
    n_layer=8,
    n_head=8,
    n_prefix=N_PREFIX,           # number of visual prefix tokens
    max_seq_len=256,
    dino_model_id="facebook/dinov3-vits16-pretrain-lvd1689m",
    freeze_dino=True,
).to(device)

optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), lr=3e-4, weight_decay=1e-2
)


Using device: cuda
[INFO] Kept 47494/223462 rows with existing PNGs under C:\Users\emman\Desktop\PROYECTOS_VS_CODE\PRUEBAS_DE_PYTHON\CheXpertPlus\PNG


In [None]:
# ---- Train a few slices just to validate wiring ----
for epoch in range(50):
    slice_train_loader = islice(train_loader, 10)
    slice_valid_loader = islice(valid_loader, 10)
    train_stats = train_one_epoch(model, slice_train_loader, optimizer, device, pad_id, num_batches=10, grad_clip=1.0)
    val_stats = evaluate(model, slice_valid_loader, device, pad_id, num_batches=10)
    print(f"Epoch {epoch + 1}: Train Loss={train_stats['loss']:.4f}, PPL={train_stats['ppl']:.2f} | "
            f"Val Loss={val_stats['val_loss']:.4f}, Val PPL={val_stats['val_ppl']:.2f}")

def sequence_ce_loss(logits, labels, pad_id):
    """
    logits: (B, T, V) — corresponds to input_ids[:, :] positions
    labels: (B, T) — next tokens; pad ignored
    """
    B, T, V = logits.size()
    loss_fn = nn.CrossEntropyLoss(ignore_index=pad_id, label_smoothing=0.1)
    return loss_fn(logits.reshape(B * T, V), labels.reshape(B * T))

@torch.no_grad()
def batch_perplexity(logits, labels, pad_id):
    loss = sequence_ce_loss(logits, labels, pad_id)
    return float(math.exp(min(loss.item(), 20.0)))

slice_test_loader = islice(test_loader, 1)
test_stats = evaluate(model, slice_test_loader, device, pad_id, num_batches=1)
print(f"Test Loss={test_stats['val_loss']:.4f}, Test PPL={test_stats['val_ppl']:.2f}")
# ---- Quick generation sanity check ----
with torch.no_grad():
    for pixel_values, ids_loader, paths, raw_labels in test_loader:
        pixel_values = pixel_values.to(device)
        gen_ids = model.generate(
            pixel_values=pixel_values,
            bos_id=bos_id, eos_id=eos_id,
            max_new_tokens=256, top_p=0.9, temperature=0.9, greedy=True
        )
        print("Predictions (first batch):")
        for i in range(min(gen_ids.size(0), 8)):
            print(f"\nGEN {i+1}:", tokenizer.decode(gen_ids[i].tolist()))
            print(f"TGT {i+1}:", tokenizer.decode(ids_loader[i].tolist()))
            # Calculate loss between generated and target sequences
        del pixel_values, ids_loader, paths, raw_labels, gen_ids
        torch.cuda.empty_cache()
        break

Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 1: Train Loss=5.0197, PPL=158.71 | Val Loss=4.9587, Val PPL=145.70


Training: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Epoch 2: Train Loss=4.8745, PPL=135.51 | Val Loss=4.7260, Val PPL=115.78


Training: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 3: Train Loss=4.5883, PPL=101.31 | Val Loss=4.5383, Val PPL=95.47


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 4: Train Loss=4.4641, PPL=91.01 | Val Loss=4.3915, Val PPL=82.41


Training: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 5: Train Loss=4.4340, PPL=86.10 | Val Loss=4.2703, Val PPL=72.96


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 6: Train Loss=4.1572, PPL=65.70 | Val Loss=4.1886, Val PPL=67.22


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 7: Train Loss=4.1705, PPL=66.34 | Val Loss=4.1035, Val PPL=61.52


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 8: Train Loss=4.2007, PPL=68.38 | Val Loss=4.0377, Val PPL=57.71


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 9: Train Loss=4.0605, PPL=61.98 | Val Loss=3.9586, Val PPL=53.24


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 10: Train Loss=3.8004, PPL=46.32 | Val Loss=3.9243, Val PPL=51.50


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 11: Train Loss=3.7708, PPL=44.20 | Val Loss=3.8613, Val PPL=48.30


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 12: Train Loss=3.8490, PPL=48.37 | Val Loss=3.8220, Val PPL=46.48


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 13: Train Loss=3.7910, PPL=45.56 | Val Loss=3.8114, Val PPL=45.95


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 14: Train Loss=3.7840, PPL=45.99 | Val Loss=3.7700, Val PPL=44.06


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 15: Train Loss=3.7892, PPL=47.20 | Val Loss=3.7491, Val PPL=43.17


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 16: Train Loss=3.6160, PPL=37.93 | Val Loss=3.7195, Val PPL=41.90


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 17: Train Loss=3.7947, PPL=45.75 | Val Loss=3.6750, Val PPL=40.10


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 18: Train Loss=3.5980, PPL=37.03 | Val Loss=3.6588, Val PPL=39.41


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 19: Train Loss=3.6742, PPL=40.32 | Val Loss=3.6431, Val PPL=38.76


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 20: Train Loss=3.6753, PPL=40.26 | Val Loss=3.6118, Val PPL=37.61


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 21: Train Loss=3.5925, PPL=37.36 | Val Loss=3.5933, Val PPL=36.88


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 22: Train Loss=3.5298, PPL=34.28 | Val Loss=3.5603, Val PPL=35.73


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 23: Train Loss=3.5017, PPL=33.89 | Val Loss=3.5606, Val PPL=35.68


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 24: Train Loss=3.6843, PPL=40.77 | Val Loss=3.5518, Val PPL=35.39


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 25: Train Loss=3.5151, PPL=34.32 | Val Loss=3.5297, Val PPL=34.62


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 26: Train Loss=3.5802, PPL=37.59 | Val Loss=3.5075, Val PPL=33.82


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 27: Train Loss=3.4526, PPL=32.96 | Val Loss=3.5029, Val PPL=33.69


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 28: Train Loss=3.5846, PPL=36.80 | Val Loss=3.4850, Val PPL=33.07


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


Epoch 29: Train Loss=3.5247, PPL=36.00 | Val Loss=3.4579, Val PPL=32.22


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 30: Train Loss=3.3998, PPL=31.24 | Val Loss=3.4509, Val PPL=31.95


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 31: Train Loss=3.4098, PPL=31.40 | Val Loss=3.4514, Val PPL=31.96


Training: 100%|██████████| 10/10 [00:10<00:00,  1.10s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


Epoch 32: Train Loss=3.3555, PPL=29.61 | Val Loss=3.4219, Val PPL=31.09


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 33: Train Loss=3.4156, PPL=31.23 | Val Loss=3.4307, Val PPL=31.37


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 34: Train Loss=3.4374, PPL=32.22 | Val Loss=3.4108, Val PPL=30.70


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 35: Train Loss=3.4409, PPL=31.73 | Val Loss=3.4195, Val PPL=30.99


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 36: Train Loss=3.3514, PPL=29.13 | Val Loss=3.4161, Val PPL=30.88


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 37: Train Loss=3.3434, PPL=28.80 | Val Loss=3.3949, Val PPL=30.29


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 38: Train Loss=3.3717, PPL=29.67 | Val Loss=3.3876, Val PPL=30.05


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 39: Train Loss=3.3304, PPL=28.65 | Val Loss=3.3752, Val PPL=29.65


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 40: Train Loss=3.3279, PPL=28.73 | Val Loss=3.3761, Val PPL=29.67


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 41: Train Loss=3.4836, PPL=33.00 | Val Loss=3.3767, Val PPL=29.75


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 42: Train Loss=3.4699, PPL=33.12 | Val Loss=3.3651, Val PPL=29.33


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 43: Train Loss=3.4131, PPL=30.98 | Val Loss=3.3383, Val PPL=28.61


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 44: Train Loss=3.2679, PPL=27.18 | Val Loss=3.3347, Val PPL=28.47


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 45: Train Loss=3.2436, PPL=26.86 | Val Loss=3.3334, Val PPL=28.46


Training: 100%|██████████| 10/10 [00:11<00:00,  1.11s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 46: Train Loss=3.3838, PPL=30.45 | Val Loss=3.3194, Val PPL=28.04


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 47: Train Loss=3.1660, PPL=23.95 | Val Loss=3.3112, Val PPL=27.85


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 48: Train Loss=3.2534, PPL=26.09 | Val Loss=3.3020, Val PPL=27.61


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 49: Train Loss=3.2541, PPL=26.22 | Val Loss=3.2966, Val PPL=27.47


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 50: Train Loss=3.2785, PPL=27.09 | Val Loss=3.2904, Val PPL=27.27


Evaluating: 100%|██████████| 1/1 [00:01<00:00,  1.00s/it]

Test Loss=3.3161, Test PPL=27.55





In [24]:
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), lr=5e-4, weight_decay=1e-2
)

# ---- Train a few slices just to validate wiring ----
for epoch in range(50):
    slice_train_loader = islice(train_loader, 10)
    slice_valid_loader = islice(valid_loader, 10)
    train_stats = train_one_epoch(model, slice_train_loader, optimizer, device, pad_id, num_batches=10, grad_clip=1.0)
    val_stats = evaluate(model, slice_valid_loader, device, pad_id, num_batches=10)
    print(f"Epoch {epoch + 1}: Train Loss={train_stats['loss']:.4f}, PPL={train_stats['ppl']:.2f} | "
            f"Val Loss={val_stats['val_loss']:.4f}, Val PPL={val_stats['val_ppl']:.2f}")

def sequence_ce_loss(logits, labels, pad_id):
    """
    logits: (B, T, V) — corresponds to input_ids[:, :] positions
    labels: (B, T) — next tokens; pad ignored
    """
    B, T, V = logits.size()
    loss_fn = nn.CrossEntropyLoss(ignore_index=pad_id, label_smoothing=0.1)
    return loss_fn(logits.reshape(B * T, V), labels.reshape(B * T))

@torch.no_grad()
def batch_perplexity(logits, labels, pad_id):
    loss = sequence_ce_loss(logits, labels, pad_id)
    return float(math.exp(min(loss.item(), 20.0)))

slice_test_loader = islice(test_loader, 1)
test_stats = evaluate(model, slice_test_loader, device, pad_id, num_batches=1)
print(f"Test Loss={test_stats['val_loss']:.4f}, Test PPL={test_stats['val_ppl']:.2f}")
# ---- Quick generation sanity check ----
with torch.no_grad():
    for pixel_values, ids_loader, paths, raw_labels in test_loader:
        pixel_values = pixel_values.to(device)
        gen_ids = model.generate(
            pixel_values=pixel_values,
            bos_id=bos_id, eos_id=eos_id,
            max_new_tokens=256, top_p=0.9, temperature=0.9, greedy=True
        )
        print("Predictions (first batch):")
        for i in range(min(gen_ids.size(0), 8)):
            print(f"\nGEN {i+1}:", tokenizer.decode(gen_ids[i].tolist()))
            print(f"TGT {i+1}:", tokenizer.decode(ids_loader[i].tolist()))
            # Calculate loss between generated and target sequences
        del pixel_values, ids_loader, paths, raw_labels, gen_ids
        torch.cuda.empty_cache()
        break

Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 1: Train Loss=3.5706, PPL=36.86 | Val Loss=3.4515, Val PPL=32.04


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Epoch 2: Train Loss=3.4131, PPL=31.49 | Val Loss=3.4323, Val PPL=31.41


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 3: Train Loss=3.2753, PPL=27.04 | Val Loss=3.4008, Val PPL=30.49


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 4: Train Loss=3.3605, PPL=29.83 | Val Loss=3.3971, Val PPL=30.36


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 5: Train Loss=3.2628, PPL=26.89 | Val Loss=3.3726, Val PPL=29.62


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Epoch 6: Train Loss=3.3077, PPL=27.76 | Val Loss=3.3885, Val PPL=30.07


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 7: Train Loss=3.4266, PPL=31.42 | Val Loss=3.3410, Val PPL=28.71


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 8: Train Loss=3.2446, PPL=26.70 | Val Loss=3.3534, Val PPL=29.10


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 9: Train Loss=3.2936, PPL=27.91 | Val Loss=3.3499, Val PPL=28.94


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 10: Train Loss=3.3675, PPL=29.80 | Val Loss=3.3377, Val PPL=28.62


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 11: Train Loss=3.3471, PPL=29.35 | Val Loss=3.3474, Val PPL=28.92


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 12: Train Loss=3.3975, PPL=30.92 | Val Loss=3.3296, Val PPL=28.38


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 13: Train Loss=3.2519, PPL=26.10 | Val Loss=3.3344, Val PPL=28.45


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 14: Train Loss=3.4334, PPL=32.21 | Val Loss=3.3039, Val PPL=27.61


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 15: Train Loss=3.1466, PPL=23.55 | Val Loss=3.2976, Val PPL=27.44


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 16: Train Loss=3.3756, PPL=29.64 | Val Loss=3.3178, Val PPL=27.98


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 17: Train Loss=3.1805, PPL=24.38 | Val Loss=3.3010, Val PPL=27.50


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 18: Train Loss=3.1777, PPL=24.37 | Val Loss=3.2919, Val PPL=27.22


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 19: Train Loss=3.1367, PPL=23.30 | Val Loss=3.2657, Val PPL=26.57


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 20: Train Loss=3.2188, PPL=25.36 | Val Loss=3.2870, Val PPL=27.14


Training: 100%|██████████| 10/10 [00:11<00:00,  1.10s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 21: Train Loss=3.2573, PPL=26.09 | Val Loss=3.2778, Val PPL=26.85


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 22: Train Loss=3.2962, PPL=27.22 | Val Loss=3.2688, Val PPL=26.62


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 23: Train Loss=3.2664, PPL=26.64 | Val Loss=3.2663, Val PPL=26.57


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 24: Train Loss=3.3100, PPL=28.39 | Val Loss=3.2542, Val PPL=26.19


Training: 100%|██████████| 10/10 [00:10<00:00,  1.10s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 25: Train Loss=3.2482, PPL=26.31 | Val Loss=3.2739, Val PPL=26.79


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]


Epoch 26: Train Loss=3.2134, PPL=25.37 | Val Loss=3.2391, Val PPL=25.91


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 27: Train Loss=3.2329, PPL=26.12 | Val Loss=3.2357, Val PPL=25.80


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 28: Train Loss=3.2513, PPL=26.56 | Val Loss=3.2246, Val PPL=25.47


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 29: Train Loss=3.1043, PPL=22.77 | Val Loss=3.2086, Val PPL=25.17


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 30: Train Loss=3.1854, PPL=24.34 | Val Loss=3.2016, Val PPL=24.97


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 31: Train Loss=3.2671, PPL=26.65 | Val Loss=3.2168, Val PPL=25.30


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 32: Train Loss=3.0931, PPL=22.31 | Val Loss=3.2099, Val PPL=25.17


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 33: Train Loss=3.1025, PPL=22.65 | Val Loss=3.2008, Val PPL=24.93


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 34: Train Loss=3.3099, PPL=27.77 | Val Loss=3.2057, Val PPL=25.10


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 35: Train Loss=3.0735, PPL=22.07 | Val Loss=3.2065, Val PPL=25.10


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 36: Train Loss=3.1928, PPL=24.92 | Val Loss=3.2116, Val PPL=25.19


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 37: Train Loss=3.1747, PPL=24.25 | Val Loss=3.1980, Val PPL=24.90


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 38: Train Loss=3.1213, PPL=23.20 | Val Loss=3.1853, Val PPL=24.53


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 39: Train Loss=3.2135, PPL=25.27 | Val Loss=3.1678, Val PPL=24.06


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 40: Train Loss=3.2294, PPL=26.36 | Val Loss=3.1730, Val PPL=24.22


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 41: Train Loss=3.0920, PPL=22.38 | Val Loss=3.1588, Val PPL=23.81


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 42: Train Loss=3.1373, PPL=23.67 | Val Loss=3.1598, Val PPL=23.86


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 43: Train Loss=3.0718, PPL=21.88 | Val Loss=3.1756, Val PPL=24.26


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]


Epoch 44: Train Loss=3.1625, PPL=24.50 | Val Loss=3.1707, Val PPL=24.11


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 45: Train Loss=2.9365, PPL=19.00 | Val Loss=3.1788, Val PPL=24.32


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 46: Train Loss=3.2469, PPL=26.20 | Val Loss=3.1651, Val PPL=24.03


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 47: Train Loss=3.1072, PPL=22.71 | Val Loss=3.1570, Val PPL=23.82


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 48: Train Loss=3.2467, PPL=26.63 | Val Loss=3.1581, Val PPL=23.81


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 49: Train Loss=3.0437, PPL=21.32 | Val Loss=3.1379, Val PPL=23.33


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 50: Train Loss=3.0657, PPL=21.93 | Val Loss=3.1443, Val PPL=23.47


Evaluating: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


Test Loss=3.0535, Test PPL=21.19
Predictions (first batch):

GEN 1: 1. pa and lateral views of the chest demonstrate stable positioning of a right upper extremity picc line. 2. no evidence of acute cardiopulmonary disease. 3. no significant interval change in cardiomegaly. degenerative changes of the thoracic spine. osteopenia.., no acute osteopenia., no acute osteopenia, with, pleural effusion. ). - 2008 hours demonstrates no acute osteopenia, pulmonary edema., unchanged. - 2008. osteopenia, osteopenia, unchanged. osteopenia, pleural effusion. )., unchanged., unchanged. osteopenia, no acute osteopenia, no significant osteopenia, no acute osteopenia, right - 6 - 6 - 6 - 6 - 2008., md at 10 - 2008 at the thoracic spine. - 6 - 2008. - 2008, no acute osteopenia, no significant osteopenia, no significant interval change in the attending radiographic change.,
TGT 1: 1. chest 2 views, demonstrate no focal consolidation or pleural effusion. stable overall aeration and volume 2. cardiac silhou

In [25]:
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()), lr=2e-4, weight_decay=1e-2
)

lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)

# ---- Train a few slices just to validate wiring ----
for epoch in range(50):
    slice_train_loader = islice(train_loader, 10)
    slice_valid_loader = islice(valid_loader, 10)
    train_stats = train_one_epoch(model, slice_train_loader, optimizer, device, pad_id, num_batches=10, grad_clip=1.0)
    val_stats = evaluate(model, slice_valid_loader, device, pad_id, num_batches=10)
    lr_scheduler.step()
    print(f"Epoch {epoch + 1}: Train Loss={train_stats['loss']:.4f}, PPL={train_stats['ppl']:.2f} | "
            f"Val Loss={val_stats['val_loss']:.4f}, Val PPL={val_stats['val_ppl']:.2f}")

def sequence_ce_loss(logits, labels, pad_id):
    """
    logits: (B, T, V) — corresponds to input_ids[:, :] positions
    labels: (B, T) — next tokens; pad ignored
    """
    B, T, V = logits.size()
    loss_fn = nn.CrossEntropyLoss(ignore_index=pad_id, label_smoothing=0.1)
    return loss_fn(logits.reshape(B * T, V), labels.reshape(B * T))

@torch.no_grad()
def batch_perplexity(logits, labels, pad_id):
    loss = sequence_ce_loss(logits, labels, pad_id)
    return float(math.exp(min(loss.item(), 20.0)))

slice_test_loader = islice(test_loader, 1)
test_stats = evaluate(model, slice_test_loader, device, pad_id, num_batches=1)
print(f"Test Loss={test_stats['val_loss']:.4f}, Test PPL={test_stats['val_ppl']:.2f}")
# ---- Quick generation sanity check ----
with torch.no_grad():
    for pixel_values, ids_loader, paths, raw_labels in test_loader:
        pixel_values = pixel_values.to(device)
        gen_ids = model.generate(
            pixel_values=pixel_values,
            bos_id=bos_id, eos_id=eos_id,
            max_new_tokens=256, top_p=0.9, temperature=0.9, greedy=True
        )
        print("Predictions (first batch):")
        for i in range(min(gen_ids.size(0), 8)):
            print(f"\nGEN {i+1}:", tokenizer.decode(gen_ids[i].tolist()))
            print(f"TGT {i+1}:", tokenizer.decode(ids_loader[i].tolist()))
            # Calculate loss between generated and target sequences
        del pixel_values, ids_loader, paths, raw_labels, gen_ids
        torch.cuda.empty_cache()
        break

Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 1: Train Loss=3.0362, PPL=20.97 | Val Loss=3.1141, Val PPL=22.79


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 2: Train Loss=3.1639, PPL=23.78 | Val Loss=3.0976, Val PPL=22.41


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 3: Train Loss=3.0940, PPL=22.47 | Val Loss=3.0810, Val PPL=22.05


Training: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 4: Train Loss=3.0455, PPL=21.23 | Val Loss=3.0688, Val PPL=21.79


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.00it/s]


Epoch 5: Train Loss=3.0456, PPL=21.43 | Val Loss=3.0564, Val PPL=21.53


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


Epoch 6: Train Loss=3.0149, PPL=21.36 | Val Loss=3.0522, Val PPL=21.43


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.00s/it]


Epoch 7: Train Loss=3.1733, PPL=24.66 | Val Loss=3.0552, Val PPL=21.48


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 8: Train Loss=3.0237, PPL=20.89 | Val Loss=3.0504, Val PPL=21.40


Training: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


Epoch 9: Train Loss=2.9483, PPL=19.66 | Val Loss=3.0444, Val PPL=21.27


Training: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 10: Train Loss=3.0184, PPL=20.72 | Val Loss=3.0405, Val PPL=21.17


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 11: Train Loss=2.8831, PPL=18.24 | Val Loss=3.0411, Val PPL=21.20


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 12: Train Loss=3.0024, PPL=20.79 | Val Loss=3.0372, Val PPL=21.12


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 13: Train Loss=2.9548, PPL=19.55 | Val Loss=3.0316, Val PPL=20.99


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 14: Train Loss=2.9375, PPL=20.00 | Val Loss=3.0271, Val PPL=20.90


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 15: Train Loss=3.0839, PPL=22.65 | Val Loss=3.0205, Val PPL=20.76


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 16: Train Loss=2.9619, PPL=19.55 | Val Loss=3.0143, Val PPL=20.64


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 17: Train Loss=2.9685, PPL=20.26 | Val Loss=3.0129, Val PPL=20.61


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 18: Train Loss=3.1715, PPL=24.35 | Val Loss=3.0082, Val PPL=20.51


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


Epoch 19: Train Loss=2.9256, PPL=19.04 | Val Loss=3.0058, Val PPL=20.47


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 20: Train Loss=2.9812, PPL=19.91 | Val Loss=3.0028, Val PPL=20.41


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 21: Train Loss=2.9981, PPL=20.32 | Val Loss=2.9954, Val PPL=20.24


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 22: Train Loss=2.9400, PPL=19.25 | Val Loss=2.9907, Val PPL=20.16


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 23: Train Loss=2.8823, PPL=17.93 | Val Loss=2.9865, Val PPL=20.08


Training: 100%|██████████| 10/10 [00:11<00:00,  1.10s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 24: Train Loss=3.1005, PPL=23.12 | Val Loss=2.9815, Val PPL=19.96


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 25: Train Loss=2.9005, PPL=18.40 | Val Loss=2.9795, Val PPL=19.92


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 26: Train Loss=2.9261, PPL=18.95 | Val Loss=2.9773, Val PPL=19.88


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 27: Train Loss=2.9423, PPL=19.23 | Val Loss=2.9698, Val PPL=19.73


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 28: Train Loss=3.0021, PPL=20.52 | Val Loss=2.9622, Val PPL=19.59


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 29: Train Loss=3.0216, PPL=20.91 | Val Loss=2.9613, Val PPL=19.57


Training: 100%|██████████| 10/10 [00:11<00:00,  1.10s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 30: Train Loss=2.9367, PPL=19.13 | Val Loss=2.9589, Val PPL=19.52


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 31: Train Loss=2.9094, PPL=18.57 | Val Loss=2.9556, Val PPL=19.46


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 32: Train Loss=2.9093, PPL=18.75 | Val Loss=2.9538, Val PPL=19.43


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 33: Train Loss=2.9677, PPL=19.66 | Val Loss=2.9519, Val PPL=19.38


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 34: Train Loss=2.9894, PPL=20.20 | Val Loss=2.9499, Val PPL=19.34


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 35: Train Loss=2.8552, PPL=17.66 | Val Loss=2.9478, Val PPL=19.30


Training: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 36: Train Loss=2.8450, PPL=17.48 | Val Loss=2.9462, Val PPL=19.27


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 37: Train Loss=2.8518, PPL=17.52 | Val Loss=2.9439, Val PPL=19.23


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 38: Train Loss=2.9588, PPL=19.97 | Val Loss=2.9425, Val PPL=19.20


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 39: Train Loss=2.8285, PPL=17.15 | Val Loss=2.9404, Val PPL=19.17


Training: 100%|██████████| 10/10 [00:10<00:00,  1.08s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.01s/it]


Epoch 40: Train Loss=2.9636, PPL=19.63 | Val Loss=2.9388, Val PPL=19.13


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.02s/it]


Epoch 41: Train Loss=3.0592, PPL=21.75 | Val Loss=2.9372, Val PPL=19.10


Training: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 42: Train Loss=2.9369, PPL=19.28 | Val Loss=2.9367, Val PPL=19.09


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 43: Train Loss=2.9475, PPL=19.52 | Val Loss=2.9358, Val PPL=19.07


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 44: Train Loss=2.8677, PPL=17.88 | Val Loss=2.9351, Val PPL=19.06


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 45: Train Loss=2.8265, PPL=17.13 | Val Loss=2.9344, Val PPL=19.05


Training: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 46: Train Loss=2.9300, PPL=18.89 | Val Loss=2.9338, Val PPL=19.04


Training: 100%|██████████| 10/10 [00:10<00:00,  1.07s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 47: Train Loss=2.9102, PPL=18.50 | Val Loss=2.9336, Val PPL=19.03


Training: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


Epoch 48: Train Loss=2.7937, PPL=16.46 | Val Loss=2.9334, Val PPL=19.03


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 49: Train Loss=2.8331, PPL=17.39 | Val Loss=2.9334, Val PPL=19.03


Training: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]
Evaluating: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Epoch 50: Train Loss=2.8511, PPL=17.49 | Val Loss=2.9334, Val PPL=19.03


Evaluating: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Test Loss=2.8813, Test PPL=17.84
Predictions (first batch):

GEN 1: 1. pa and lateral views of the chest demonstrate stable appearance of the right upper extremity picc line. 2. the lungs are clear without focal consolidation. 3. no pleural effusion. 4. no acute osseous abnormality. degenerative changes of the thoracic spine. degenerative changes of the spine. degenerative changes of the spine. degenerative changes. degenerative changes of the spine. degenerative changes of the spine. degenerative changes of the spine. spine. spine... degenerative changes.. degenerative changes of the spine. degenerative changes of the spine.. spine degenerative changes of the spine. degenerative changes of the spine.. degenerative changes of the spine. degenerative changes of the spine.. degenerative changes of the spine... spine... of the spine.. spine. spine. degenerative changes of the spine. degenerative changes of the spine., and spine. degenerative changes
TGT 1: 1. chest 2 views, demonstrate no