Model creation and loading

In [1]:
import torch
from pathlib import Path
from utils.models.complete_model import create_complete_model, save_complete_model, load_complete_model, save_checkpoint, load_checkpoint


# ALL PATHS
MODELS_DIR = "models/"
SEGMENTER_MODEL_PATH = f"{MODELS_DIR}dino_unet_decoder_finetuned.pth"
save_path = f"{MODELS_DIR}complete_model.pth"
checkpoint_path = f"{MODELS_DIR}model_checkpoint.pth"

# Example usage
device = "cuda" if torch.cuda.is_available() else "cpu"
model = create_complete_model(device=device, SEGMENTER_MODEL_PATH=SEGMENTER_MODEL_PATH)

# # Load the model
# if Path(save_path).exists():
#     model = load_complete_model(model, save_path, device=device, strict=True)

Loaded segmenter weights from models/dino_unet_decoder_finetuned.pth


Data loader creation

In [2]:
from utils.data.dataloaders import create_dataloaders

# CheXpert
CHEXPERT_DIR = "Datasets/CheXpertPlus"
chexpert_paths = {
    "chexpert_data_path": f"{CHEXPERT_DIR}/PNG",  # base PNG folder
    "chexpert_data_csv": f"{CHEXPERT_DIR}/df_chexpert_plus_240401_findings.csv",
}

# MIMIC
MIMIC_DIR = "Datasets/MIMIC"
mimic_paths = {
    "mimic_data_path": MIMIC_DIR,
    "mimic_splits_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-split.csv.gz",
    "mimic_metadata_csv": f"{MIMIC_DIR}/mimic-cxr-2.0.0-metadata-findings-only.csv",
    "mimic_reports_path": f"{MIMIC_DIR}/cxr-record-list.csv.gz",  # must contain 'path'
    "mimic_images_dir": f"{MIMIC_DIR}/matched_images_and_masks_mimic_224/images",
}

import os
kwargs = {
    # "num_workers": os.cpu_count() // 2 if os.cpu_count() else 4,  # adjust on your VM
    # "persistent_workers": True,           # reuses workers between iterations
    # "prefetch_factor": 4,                 # each worker prefetches batches
    # "pin_memory": True,                   # if using CUDA
    # "drop_last": False
}

train_loader = create_dataloaders(
    chexpert_paths, 
    mimic_paths, 
    batch_size=4,
    split="train", 
    sampling_ratio=0.7,
    **kwargs
)

valid_loader = create_dataloaders(
    chexpert_paths,
    mimic_paths,
    batch_size=4,
    split="valid",
    sampling_ratio=0.7,
    **kwargs
)

images, findings, image_paths, _ = next(iter(train_loader))
print("Batch image tensor shape:", getattr(images, "shape", "N/A"))
print("Batch findings shape:", getattr(findings, "shape", len(findings)))
print("Batch image paths shape:", getattr(image_paths, "shape", len(image_paths)))

Filtering rows with missing PNGs...
[INFO] Kept 18870/49080 rows with existing PNGs
Filtering rows with missing PNGs...
[INFO] Kept 187/496 rows with existing PNGs
Batch image tensor shape: torch.Size([4, 3, 512, 512])
Batch findings shape: 4
Batch image paths shape: 4


Training

In [3]:
from utils.training import train, EarlyStopping, EarlyStoppingConfig
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
optimizer = Adam(model.parameters(), lr=1e-5, weight_decay=0.1)
# scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5, min_lr=1e-6)

epochs = (5 * len(train_loader)) // 100
from transformers import get_cosine_schedule_with_warmup
total_steps = 5 * len(train_loader)
warmup_steps = max(1, int(0.05 * total_steps))
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

early = EarlyStopping(EarlyStoppingConfig(
    patience=15, min_delta=1e-4, mode="min", restore_best=True,
    best_ckpt_path="checkpoints/model_best_chexpert.pth"
))
print(f"Current memory before training: {torch.cuda.memory_allocated(device) / 1e9:.2f} GB") if device == "cuda" else None
torch.cuda.empty_cache() if device == "cuda" else None
train(
    model=model,
    train_loader=train_loader,
    valid_loader=valid_loader,
    optimizer=optimizer,
    epochs=epochs,                       # total target; not "remaining"
    device=device,
    log_dir="runs/chestx_exp2_chexpert",       # SAME dir to keep appending
    checkpoint_path="checkpoints/model_epoch_chexpert.pth",
    validate_every=1,
    ckpt_every=2,
    scheduler=scheduler,
    scheduler_step_on="step",
    early_stopping=early,
    resume_from="checkpoints/model_best_chexpert.pth",  # or model_best.pth if you prefer to start from best weights
    # start_epoch=...,                 # optional override
    # start_global_step=...,           # optional override
)


Current memory before training: 0.85 GB
üîÅ Resuming from checkpoint: checkpoints/model_best_chexpert.pth
Loaded checkpoint from checkpoints/model_best_chexpert.pth (epoch=9, global_step=909, tokens_cum=147888, best_metric=1.8471332144737245)


Epoch 10 Training:   0%|          | 0/4718 [00:00<?, ?batch/s]`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.
Epoch 10 Training:   2%|‚ñè         | 101/4718 [01:03<48:41,  1.58batch/s] 


Epoch 10 | Loss: 4.0677 | LR: 0.0000 | Tokens: 164253


Epoch 10 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:08<00:00,  5.75batch/s]


Epoch 10 | Validation Loss: 1.7996 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 11 Training:   2%|‚ñè         | 101/4718 [00:53<40:36,  1.90batch/s] 


Epoch 11 | Loss: 3.9785 | LR: 0.0000 | Tokens: 181164


Epoch 11 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.27batch/s]


Epoch 11 | Validation Loss: 1.7671 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 12 Training:   2%|‚ñè         | 101/4718 [00:40<31:00,  2.48batch/s]


Epoch 12 | Loss: 3.9999 | LR: 0.0000 | Tokens: 198930


Epoch 12 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.32batch/s]


Epoch 12 | Validation Loss: 1.7292 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 13 Training:   2%|‚ñè         | 101/4718 [00:40<30:52,  2.49batch/s]


Epoch 13 | Loss: 3.8194 | LR: 0.0000 | Tokens: 216097


Epoch 13 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.43batch/s]


Epoch 13 | Validation Loss: 1.6987 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 14 Training:   2%|‚ñè         | 101/4718 [00:39<30:16,  2.54batch/s]


Epoch 14 | Loss: 3.7267 | LR: 0.0000 | Tokens: 232543


Epoch 14 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.33batch/s]


Epoch 14 | Validation Loss: 1.6434 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 15 Training:   2%|‚ñè         | 101/4718 [00:43<32:53,  2.34batch/s] 


Epoch 15 | Loss: 3.6926 | LR: 0.0000 | Tokens: 250183


Epoch 15 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.25batch/s]


Epoch 15 | Validation Loss: 1.6160 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 16 Training:   2%|‚ñè         | 101/4718 [00:39<30:23,  2.53batch/s]


Epoch 16 | Loss: 3.4655 | LR: 0.0000 | Tokens: 266536


Epoch 16 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.48batch/s]


Epoch 16 | Validation Loss: 1.5361 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 17 Training:   2%|‚ñè         | 101/4718 [00:47<35:59,  2.14batch/s] 


Epoch 17 | Loss: 3.4265 | LR: 0.0000 | Tokens: 282833


Epoch 17 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.16batch/s]


Epoch 17 | Validation Loss: 1.4987 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 18 Training:   2%|‚ñè         | 101/4718 [00:40<30:29,  2.52batch/s]


Epoch 18 | Loss: 3.1199 | LR: 0.0000 | Tokens: 298829


Epoch 18 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.26batch/s]


Epoch 18 | Validation Loss: 1.4586 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 19 Training:   2%|‚ñè         | 101/4718 [00:40<30:57,  2.49batch/s]


Epoch 19 | Loss: 3.2007 | LR: 0.0000 | Tokens: 315958


Epoch 19 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.39batch/s]


Epoch 19 | Validation Loss: 1.4355 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 20 Training:   2%|‚ñè         | 101/4718 [00:40<30:55,  2.49batch/s]


Epoch 20 | Loss: 3.2034 | LR: 0.0000 | Tokens: 332987


Epoch 20 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.52batch/s]


Epoch 20 | Validation Loss: 1.4142 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 21 Training:   2%|‚ñè         | 101/4718 [00:40<31:08,  2.47batch/s]


Epoch 21 | Loss: 3.2109 | LR: 0.0000 | Tokens: 349814


Epoch 21 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.02batch/s]


Epoch 21 | Validation Loss: 1.4073 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 22 Training:   2%|‚ñè         | 101/4718 [00:40<30:41,  2.51batch/s]


Epoch 22 | Loss: 3.1554 | LR: 0.0000 | Tokens: 366327


Epoch 22 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.34batch/s]


Epoch 22 | Validation Loss: 1.3681 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 23 Training:   2%|‚ñè         | 101/4718 [00:45<34:43,  2.22batch/s] 


Epoch 23 | Loss: 3.0902 | LR: 0.0000 | Tokens: 384268


Epoch 23 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.47batch/s]


Epoch 23 | Validation Loss: 1.4005 | Validation Tokens: 7481


Epoch 24 Training:   2%|‚ñè         | 101/4718 [00:39<30:28,  2.53batch/s]


Epoch 24 | Loss: 3.0611 | LR: 0.0000 | Tokens: 400834


Epoch 24 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.36batch/s]


Epoch 24 | Validation Loss: 1.3822 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 25 Training:   2%|‚ñè         | 101/4718 [00:40<30:34,  2.52batch/s]


Epoch 25 | Loss: 3.0822 | LR: 0.0000 | Tokens: 417433


Epoch 25 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.40batch/s]


Epoch 25 | Validation Loss: 1.3745 | Validation Tokens: 7481


Epoch 26 Training:   2%|‚ñè         | 101/4718 [00:39<30:24,  2.53batch/s]


Epoch 26 | Loss: 3.0682 | LR: 0.0000 | Tokens: 434439


Epoch 26 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.45batch/s]


Epoch 26 | Validation Loss: 1.3595 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 27 Training:   2%|‚ñè         | 101/4718 [00:39<30:22,  2.53batch/s]


Epoch 27 | Loss: 3.0776 | LR: 0.0000 | Tokens: 451103


Epoch 27 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.15batch/s]


Epoch 27 | Validation Loss: 1.3375 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 28 Training:   2%|‚ñè         | 101/4718 [00:40<30:28,  2.52batch/s]


Epoch 28 | Loss: 3.0676 | LR: 0.0000 | Tokens: 467546


Epoch 28 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.55batch/s]


Epoch 28 | Validation Loss: 1.3302 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 29 Training:   2%|‚ñè         | 101/4718 [00:40<30:36,  2.51batch/s]


Epoch 29 | Loss: 2.9909 | LR: 0.0000 | Tokens: 483822


Epoch 29 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.33batch/s]


Epoch 29 | Validation Loss: 1.3442 | Validation Tokens: 7481


Epoch 30 Training:   2%|‚ñè         | 101/4718 [00:39<30:28,  2.53batch/s]


Epoch 30 | Loss: 3.0181 | LR: 0.0000 | Tokens: 500585


Epoch 30 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.50batch/s]


Epoch 30 | Validation Loss: 1.3371 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 31 Training:   2%|‚ñè         | 101/4718 [00:39<30:16,  2.54batch/s]


Epoch 31 | Loss: 2.9468 | LR: 0.0000 | Tokens: 516910


Epoch 31 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.47batch/s]


Epoch 31 | Validation Loss: 1.3293 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 32 Training:   2%|‚ñè         | 101/4718 [00:40<30:46,  2.50batch/s]


Epoch 32 | Loss: 3.1663 | LR: 0.0000 | Tokens: 533668


Epoch 32 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.46batch/s]


Epoch 32 | Validation Loss: 1.3235 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 33 Training:   2%|‚ñè         | 101/4718 [00:40<30:48,  2.50batch/s]


Epoch 33 | Loss: 3.0429 | LR: 0.0000 | Tokens: 549994


Epoch 33 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.38batch/s]


Epoch 33 | Validation Loss: 1.3282 | Validation Tokens: 7481


Epoch 34 Training:   2%|‚ñè         | 101/4718 [00:40<30:34,  2.52batch/s]


Epoch 34 | Loss: 3.0721 | LR: 0.0000 | Tokens: 566870


Epoch 34 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.58batch/s]


Epoch 34 | Validation Loss: 1.3168 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 35 Training:   2%|‚ñè         | 101/4718 [00:39<30:06,  2.56batch/s]


Epoch 35 | Loss: 2.8988 | LR: 0.0000 | Tokens: 582106


Epoch 35 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.45batch/s]


Epoch 35 | Validation Loss: 1.3226 | Validation Tokens: 7481


Epoch 36 Training:   2%|‚ñè         | 101/4718 [00:39<30:07,  2.55batch/s]


Epoch 36 | Loss: 3.0023 | LR: 0.0000 | Tokens: 598358


Epoch 36 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.54batch/s]


Epoch 36 | Validation Loss: 1.3253 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 37 Training:   2%|‚ñè         | 101/4718 [00:40<30:34,  2.52batch/s]


Epoch 37 | Loss: 3.0220 | LR: 0.0000 | Tokens: 614675


Epoch 37 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.39batch/s]


Epoch 37 | Validation Loss: 1.3116 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 38 Training:   2%|‚ñè         | 101/4718 [00:40<30:57,  2.49batch/s]


Epoch 38 | Loss: 2.9511 | LR: 0.0000 | Tokens: 632582


Epoch 38 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.35batch/s]


Epoch 38 | Validation Loss: 1.3016 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 39 Training:   2%|‚ñè         | 101/4718 [00:39<30:23,  2.53batch/s]


Epoch 39 | Loss: 3.0260 | LR: 0.0000 | Tokens: 648873


Epoch 39 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.15batch/s]


Epoch 39 | Validation Loss: 1.2932 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 40 Training:   2%|‚ñè         | 101/4718 [00:40<30:38,  2.51batch/s]


Epoch 40 | Loss: 2.9787 | LR: 0.0000 | Tokens: 665867


Epoch 40 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.36batch/s]


Epoch 40 | Validation Loss: 1.3285 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 41 Training:   2%|‚ñè         | 101/4718 [00:40<30:56,  2.49batch/s]


Epoch 41 | Loss: 3.0691 | LR: 0.0000 | Tokens: 682730


Epoch 41 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.48batch/s]


Epoch 41 | Validation Loss: 1.2973 | Validation Tokens: 7481


Epoch 42 Training:   2%|‚ñè         | 101/4718 [00:40<30:31,  2.52batch/s]


Epoch 42 | Loss: 3.0102 | LR: 0.0000 | Tokens: 699472


Epoch 42 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.50batch/s]


Epoch 42 | Validation Loss: 1.3022 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 43 Training:   2%|‚ñè         | 101/4718 [00:40<30:36,  2.51batch/s]


Epoch 43 | Loss: 2.8742 | LR: 0.0000 | Tokens: 715953


Epoch 43 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.39batch/s]


Epoch 43 | Validation Loss: 1.3014 | Validation Tokens: 7481


Epoch 44 Training:   2%|‚ñè         | 101/4718 [00:39<30:28,  2.53batch/s]


Epoch 44 | Loss: 2.9400 | LR: 0.0000 | Tokens: 732492


Epoch 44 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.32batch/s]


Epoch 44 | Validation Loss: 1.2952 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 45 Training:   2%|‚ñè         | 101/4718 [00:39<30:21,  2.54batch/s]


Epoch 45 | Loss: 2.9277 | LR: 0.0000 | Tokens: 749538


Epoch 45 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.51batch/s]


Epoch 45 | Validation Loss: 1.2987 | Validation Tokens: 7481


Epoch 46 Training:   2%|‚ñè         | 101/4718 [00:39<30:24,  2.53batch/s]


Epoch 46 | Loss: 3.0032 | LR: 0.0000 | Tokens: 766095


Epoch 46 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.34batch/s]


Epoch 46 | Validation Loss: 1.2844 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 47 Training:   2%|‚ñè         | 101/4718 [00:40<30:43,  2.50batch/s]


Epoch 47 | Loss: 2.9678 | LR: 0.0000 | Tokens: 783123


Epoch 47 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.56batch/s]


Epoch 47 | Validation Loss: 1.2847 | Validation Tokens: 7481


Epoch 48 Training:   2%|‚ñè         | 101/4718 [00:40<30:59,  2.48batch/s]


Epoch 48 | Loss: 3.1085 | LR: 0.0000 | Tokens: 800986


Epoch 48 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.46batch/s]


Epoch 48 | Validation Loss: 1.2828 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 49 Training:   2%|‚ñè         | 101/4718 [00:39<30:17,  2.54batch/s]


Epoch 49 | Loss: 2.9671 | LR: 0.0000 | Tokens: 817262


Epoch 49 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.43batch/s]


Epoch 49 | Validation Loss: 1.2846 | Validation Tokens: 7481


Epoch 50 Training:   2%|‚ñè         | 101/4718 [00:39<30:09,  2.55batch/s]


Epoch 50 | Loss: 2.9893 | LR: 0.0000 | Tokens: 833506


Epoch 50 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.61batch/s]


Epoch 50 | Validation Loss: 1.2729 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 51 Training:   2%|‚ñè         | 101/4718 [00:40<30:32,  2.52batch/s]


Epoch 51 | Loss: 2.9351 | LR: 0.0000 | Tokens: 849639


Epoch 51 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.41batch/s]


Epoch 51 | Validation Loss: 1.2794 | Validation Tokens: 7481


Epoch 52 Training:   2%|‚ñè         | 101/4718 [00:40<30:37,  2.51batch/s]


Epoch 52 | Loss: 3.0104 | LR: 0.0000 | Tokens: 866641


Epoch 52 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.23batch/s]


Epoch 52 | Validation Loss: 1.2988 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 53 Training:   2%|‚ñè         | 101/4718 [00:40<30:54,  2.49batch/s]


Epoch 53 | Loss: 3.0644 | LR: 0.0000 | Tokens: 884474


Epoch 53 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.29batch/s]


Epoch 53 | Validation Loss: 1.2793 | Validation Tokens: 7481


Epoch 54 Training:   2%|‚ñè         | 101/4718 [00:39<30:22,  2.53batch/s]


Epoch 54 | Loss: 2.8814 | LR: 0.0000 | Tokens: 900642


Epoch 54 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.15batch/s]


Epoch 54 | Validation Loss: 1.2777 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 55 Training:   2%|‚ñè         | 101/4718 [00:40<30:41,  2.51batch/s]


Epoch 55 | Loss: 2.9330 | LR: 0.0000 | Tokens: 917159


Epoch 55 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.34batch/s]


Epoch 55 | Validation Loss: 1.2969 | Validation Tokens: 7481


Epoch 56 Training:   2%|‚ñè         | 101/4718 [00:40<30:34,  2.52batch/s]


Epoch 56 | Loss: 3.0147 | LR: 0.0000 | Tokens: 933817


Epoch 56 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.33batch/s]


Epoch 56 | Validation Loss: 1.2867 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 57 Training:   2%|‚ñè         | 101/4718 [00:40<30:46,  2.50batch/s]


Epoch 57 | Loss: 3.0413 | LR: 0.0000 | Tokens: 951357


Epoch 57 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.66batch/s]


Epoch 57 | Validation Loss: 1.2811 | Validation Tokens: 7481


Epoch 58 Training:   2%|‚ñè         | 101/4718 [00:40<30:39,  2.51batch/s]


Epoch 58 | Loss: 2.9750 | LR: 0.0000 | Tokens: 967755


Epoch 58 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.30batch/s]


Epoch 58 | Validation Loss: 1.2756 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 59 Training:   2%|‚ñè         | 101/4718 [00:40<30:31,  2.52batch/s]


Epoch 59 | Loss: 2.9291 | LR: 0.0000 | Tokens: 985038


Epoch 59 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.55batch/s]


Epoch 59 | Validation Loss: 1.2611 | Validation Tokens: 7481
New best model found during early stopping.
Saved checkpoint to checkpoints/model_best_chexpert.pth


Epoch 60 Training:   2%|‚ñè         | 101/4718 [00:39<30:26,  2.53batch/s]


Epoch 60 | Loss: 2.9647 | LR: 0.0000 | Tokens: 1001446


Epoch 60 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.22batch/s]


Epoch 60 | Validation Loss: 1.2637 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 61 Training:   2%|‚ñè         | 101/4718 [00:39<30:10,  2.55batch/s]


Epoch 61 | Loss: 2.8717 | LR: 0.0000 | Tokens: 1017770


Epoch 61 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.24batch/s]


Epoch 61 | Validation Loss: 1.2705 | Validation Tokens: 7481


Epoch 62 Training:   2%|‚ñè         | 101/4718 [00:45<34:45,  2.21batch/s] 


Epoch 62 | Loss: 2.9806 | LR: 0.0000 | Tokens: 1035112


Epoch 62 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.33batch/s]


Epoch 62 | Validation Loss: 1.2675 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 63 Training:   2%|‚ñè         | 101/4718 [00:40<30:32,  2.52batch/s]


Epoch 63 | Loss: 2.9484 | LR: 0.0000 | Tokens: 1051567


Epoch 63 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.26batch/s]


Epoch 63 | Validation Loss: 1.2720 | Validation Tokens: 7481


Epoch 64 Training:   2%|‚ñè         | 101/4718 [00:40<30:50,  2.50batch/s]


Epoch 64 | Loss: 3.0367 | LR: 0.0000 | Tokens: 1069506


Epoch 64 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.34batch/s]


Epoch 64 | Validation Loss: 1.2867 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 65 Training:   2%|‚ñè         | 101/4718 [00:39<30:26,  2.53batch/s]


Epoch 65 | Loss: 3.0127 | LR: 0.0000 | Tokens: 1086074


Epoch 65 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.42batch/s]


Epoch 65 | Validation Loss: 1.2853 | Validation Tokens: 7481


Epoch 66 Training:   2%|‚ñè         | 101/4718 [00:40<31:08,  2.47batch/s]


Epoch 66 | Loss: 3.1409 | LR: 0.0000 | Tokens: 1103054


Epoch 66 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.23batch/s]


Epoch 66 | Validation Loss: 1.2745 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 67 Training:   2%|‚ñè         | 101/4718 [00:40<30:44,  2.50batch/s]


Epoch 67 | Loss: 2.8743 | LR: 0.0000 | Tokens: 1119803


Epoch 67 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.47batch/s]


Epoch 67 | Validation Loss: 1.2707 | Validation Tokens: 7481


Epoch 68 Training:   2%|‚ñè         | 101/4718 [00:39<30:16,  2.54batch/s]


Epoch 68 | Loss: 2.9117 | LR: 0.0000 | Tokens: 1136237


Epoch 68 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.39batch/s]


Epoch 68 | Validation Loss: 1.2691 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 69 Training:   2%|‚ñè         | 101/4718 [00:40<31:07,  2.47batch/s]


Epoch 69 | Loss: 3.0500 | LR: 0.0000 | Tokens: 1153615


Epoch 69 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.43batch/s]


Epoch 69 | Validation Loss: 1.2795 | Validation Tokens: 7481


Epoch 70 Training:   2%|‚ñè         | 101/4718 [00:40<30:53,  2.49batch/s]


Epoch 70 | Loss: 3.0745 | LR: 0.0000 | Tokens: 1171298


Epoch 70 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.20batch/s]


Epoch 70 | Validation Loss: 1.2810 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 71 Training:   2%|‚ñè         | 101/4718 [00:39<30:26,  2.53batch/s]


Epoch 71 | Loss: 3.0170 | LR: 0.0000 | Tokens: 1188269


Epoch 71 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.19batch/s]


Epoch 71 | Validation Loss: 1.2878 | Validation Tokens: 7481


Epoch 72 Training:   2%|‚ñè         | 101/4718 [00:40<30:52,  2.49batch/s]


Epoch 72 | Loss: 3.0440 | LR: 0.0000 | Tokens: 1205989


Epoch 72 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.29batch/s]


Epoch 72 | Validation Loss: 1.2787 | Validation Tokens: 7481
Saving periodic checkpoint.
Saved checkpoint to checkpoints/model_epoch_chexpert.pth


Epoch 73 Training:   2%|‚ñè         | 101/4718 [00:40<30:33,  2.52batch/s]


Epoch 73 | Loss: 2.9948 | LR: 0.0000 | Tokens: 1222435


Epoch 73 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.51batch/s]


Epoch 73 | Validation Loss: 1.2685 | Validation Tokens: 7481


Epoch 74 Training:   2%|‚ñè         | 101/4718 [00:39<30:15,  2.54batch/s]


Epoch 74 | Loss: 2.9172 | LR: 0.0000 | Tokens: 1238736


Epoch 74 Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:07<00:00,  6.54batch/s]

Epoch 74 | Validation Loss: 1.2697 | Validation Tokens: 7481
Early stopping triggered at epoch 74. Best epoch: 59 with val loss: 1.261065331697464.
üéâ Training complete.



