In [1]:
# ========================================================================
# Cell 1: Import Libraries & Setup
# ========================================================================

import os
import time
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.cuda.amp import autocast, GradScaler
from torch.utils.tensorboard import SummaryWriter

from albumentations import Compose, Resize, Normalize, HorizontalFlip, RandomBrightnessContrast
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score

from efficientnet_pytorch import EfficientNet

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"✅ Device: {device}")


✅ Device: cuda


In [2]:
# ========================================================================
# VERIFY YOUR SAVED MODEL
# ========================================================================

# Your previous experiment name
previous_experiment = "chest_xray_20251022_093109"  # From your training

# Check if model exists
model_path = f'experiments/{previous_experiment}/best_model.pth'

if os.path.exists(model_path):
    print("✅ GREAT NEWS! Your trained model was found!")
    print(f"   Location: {model_path}")
    
    # Load checkpoint info
    checkpoint = torch.load(model_path, weights_only=False)
    print(f"\n📊 Model Information:")
    print(f"   Trained for: {checkpoint['epoch']} epochs")
    print(f"   Validation AUC: {checkpoint['val_auc_macro']:.4f}")
    print(f"   Training complete: ✅")
    print("\n   YOU DON'T NEED TO RETRAIN!")
    
else:
    print("❌ Model not found!")
    print(f"   Looking for: {model_path}")
    print("\n   Please check the experiment folder name.")


✅ GREAT NEWS! Your trained model was found!
   Location: experiments/chest_xray_20251022_093109/best_model.pth

📊 Model Information:
   Trained for: 5 epochs
   Validation AUC: 0.8024
   Training complete: ✅

   YOU DON'T NEED TO RETRAIN!


In [3]:
# ========================================================================
# REBUILD MODEL AND LOAD TRAINED WEIGHTS
# ========================================================================

class ChestXrayClassifier(nn.Module):
    def __init__(self, num_classes=14, pretrained=True):
        super(ChestXrayClassifier, self).__init__()
        
        if pretrained:
            self.backbone = EfficientNet.from_pretrained('efficientnet-b0')
        else:
            self.backbone = EfficientNet.from_name('efficientnet-b0')
        
        num_features = self.backbone._fc.in_features
        self.backbone._fc = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(num_features, num_classes)
        )
    
    def forward(self, x):
        return self.backbone(x)

# Create model
model = ChestXrayClassifier(num_classes=14, pretrained=False)

# Load trained weights
checkpoint = torch.load(model_path, weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()

print("="*70)
print("✅ MODEL LOADED SUCCESSFULLY!")
print("="*70)
print(f"Model: EfficientNet-B0")
print(f"Trained AUC: {checkpoint['val_auc_macro']:.4f}")
print(f"Epoch: {checkpoint['epoch']}")
print(f"Ready for: Next phase training")
print("="*70)


✅ MODEL LOADED SUCCESSFULLY!
Model: EfficientNet-B0
Trained AUC: 0.8024
Epoch: 5
Ready for: Next phase training


In [4]:
# ========================================================================
# PREPARE FOR 224x224 TRAINING (BETTER QUALITY)
# ========================================================================

print("\n" + "🚀 "*35)
print("PHASE 2: HIGH-RESOLUTION TRAINING (224x224)")
print("🚀 "*35 + "\n")

print("="*70)
print("WHY 224x224?")
print("="*70)
print("  • Your current model: 128x128 → 76.41% AUC")
print("  • Expected with 224x224: 78-79% AUC (+2-3%)")
print("  • Standard in medical imaging research")
print("  • Better detail preservation")
print("  • Still fits in 8GB GPU memory")
print("="*70)

# Load dataset paths
extract_path = "./chest_xray_data/"  # UPDATE if different
data_entry = pd.read_csv(os.path.join(extract_path, "Data_Entry_2017.csv"))

# Prepare splits (same as before)
train_list_path = os.path.join(extract_path, "train_val_list.txt")
test_list_path = os.path.join(extract_path, "test_list.txt")

if os.path.exists(train_list_path) and os.path.exists(test_list_path):
    train_list = pd.read_csv(train_list_path, header=None)[0].tolist()
    test_list = pd.read_csv(test_list_path, header=None)[0].tolist()
    train_val_data = data_entry[data_entry['Image Index'].isin(train_list)].reset_index(drop=True)
    test_data = data_entry[data_entry['Image Index'].isin(test_list)].reset_index(drop=True)
else:
    train_val_data, test_data = train_test_split(data_entry, test_size=0.3, random_state=42)

train_data, val_data = train_test_split(train_val_data, test_size=0.2, random_state=42)

print(f"\n✅ Dataset loaded:")
print(f"   Train:      {len(train_data):,} images")
print(f"   Validation: {len(val_data):,} images")
print(f"   Test:       {len(test_data):,} images")



🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 
PHASE 2: HIGH-RESOLUTION TRAINING (224x224)
🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 🚀 

WHY 224x224?
  • Your current model: 128x128 → 76.41% AUC
  • Expected with 224x224: 78-79% AUC (+2-3%)
  • Standard in medical imaging research
  • Better detail preservation
  • Still fits in 8GB GPU memory

✅ Dataset loaded:
   Train:      69,219 images
   Validation: 17,305 images
   Test:       25,596 images


In [5]:
# ========================================================================
# 224x224 TRANSFORMS (HIGHER RESOLUTION)
# ========================================================================

IMAGE_SIZE = 224  # INCREASED from 128

train_transforms_hires = Compose([
    Resize(IMAGE_SIZE, IMAGE_SIZE),
    HorizontalFlip(p=0.5),
    RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

val_transforms_hires = Compose([
    Resize(IMAGE_SIZE, IMAGE_SIZE),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

print("="*70)
print("HIGH-RESOLUTION CONFIGURATION")
print("="*70)
print(f"Image size:   {IMAGE_SIZE}x{IMAGE_SIZE} (was 128x128)")
print(f"Improvement:  3x more pixels per image")
print(f"Benefit:      Better detail, clearer pathology features")
print("="*70)


HIGH-RESOLUTION CONFIGURATION
Image size:   224x224 (was 128x128)
Improvement:  3x more pixels per image
Benefit:      Better detail, clearer pathology features


In [6]:
# ========================================================================
# DATASET CLASS (Same as before)
# ========================================================================

class ChestXrayDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None, num_classes=14):
        self.dataframe = dataframe.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.num_classes = num_classes
        
        self.pathologies = [
            'Atelectasis', 'Consolidation', 'Infiltration', 
            'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 
            'Effusion', 'Pneumonia', 'Pleural_Thickening', 
            'Cardiomegaly', 'Nodule', 'Mass', 'Hernia'
        ]
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx]['Image Index']
        
        possible_paths = [
            os.path.join(self.img_dir, 'images', img_name),
            os.path.join(self.img_dir, img_name),
        ]
        
        for i in range(1, 13):
            possible_paths.append(
                os.path.join(self.img_dir, f'images_{i:03d}', 'images', img_name)
            )
        
        img_path = None
        for path in possible_paths:
            if os.path.exists(path):
                img_path = path
                break
        
        if img_path is None:
            raise FileNotFoundError(f"Image not found: {img_name}")
        
        image = Image.open(img_path).convert('RGB')
        image = np.array(image)
        
        labels_str = self.dataframe.iloc[idx]['Finding Labels']
        labels = np.zeros(self.num_classes, dtype=np.float32)
        
        if labels_str != 'No Finding':
            for i, pathology in enumerate(self.pathologies):
                if pathology in labels_str:
                    labels[i] = 1.0
        
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        return image, torch.tensor(labels)

# ========================================================================
# CREATE DATALOADERS WITH 224x224 IMAGES
# ========================================================================

BATCH_SIZE = 32  # Reduced from 64 due to larger images
NUM_WORKERS = 0

train_dataset = ChestXrayDataset(train_data, img_dir=extract_path, transform=train_transforms_hires)
val_dataset = ChestXrayDataset(val_data, img_dir=extract_path, transform=val_transforms_hires)
test_dataset = ChestXrayDataset(test_data, img_dir=extract_path, transform=val_transforms_hires)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

print(f"\n✅ DataLoaders created:")
print(f"   Image size:    {IMAGE_SIZE}x{IMAGE_SIZE}")
print(f"   Batch size:    {BATCH_SIZE} (reduced for memory)")
print(f"   Train batches: {len(train_loader):,}")
print(f"   Val batches:   {len(val_loader):,}")



✅ DataLoaders created:
   Image size:    224x224
   Batch size:    32 (reduced for memory)
   Train batches: 2,164
   Val batches:   541


In [7]:
# ========================================================================
# CALCULATE CLASS WEIGHTS
# ========================================================================

def calculate_class_weights(dataframe, num_classes=14):
    pathologies = [
        'Atelectasis', 'Consolidation', 'Infiltration', 
        'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 
        'Effusion', 'Pneumonia', 'Pleural_Thickening', 
        'Cardiomegaly', 'Nodule', 'Mass', 'Hernia'
    ]
    
    total_samples = len(dataframe)
    pos_weights = []
    
    for pathology in pathologies:
        pos_count = dataframe['Finding Labels'].str.contains(pathology, regex=False).sum()
        neg_count = total_samples - pos_count
        weight = neg_count / pos_count if pos_count > 0 else 1.0
        pos_weights.append(weight)
    
    return torch.tensor(pos_weights, dtype=torch.float32)

class_weights = calculate_class_weights(train_data)
class_weights = class_weights.to(device)

print("✅ Class weights calculated")


✅ Class weights calculated


In [8]:
# ========================================================================
# INITIALIZE NEW MODEL FOR HIGH-RESOLUTION TRAINING
# ========================================================================

# Create fresh model (will train from ImageNet weights again)
model_hires = ChestXrayClassifier(num_classes=14, pretrained=True)
model_hires = model_hires.to(device)

# Training setup
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)
optimizer = AdamW(model_hires.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, min_lr=1e-7)
scaler = GradScaler()

# Training parameters
NUM_EPOCHS = 30
EARLY_STOPPING_PATIENCE = 7

# New experiment
experiment_name_hires = f"chest_xray_224x224_{time.strftime('%Y%m%d_%H%M%S')}"
os.makedirs(f'experiments/{experiment_name_hires}', exist_ok=True)
writer = SummaryWriter(f'experiments/{experiment_name_hires}/logs')

config_hires = {
    'model': 'EfficientNet-B0',
    'image_size': IMAGE_SIZE,
    'batch_size': BATCH_SIZE,
    'previous_model': '128x128 (76.41% AUC)',
    'target': '78-79% AUC',
    'improvement_expected': '+2-3%'
}

with open(f'experiments/{experiment_name_hires}/config.json', 'w') as f:
    json.dump(config_hires, f, indent=4)

print("="*70)
print("NEW MODEL INITIALIZED FOR 224x224 TRAINING")
print("="*70)
print(f"Previous model: 128x128 → 76.41% test AUC")
print(f"Target:         224x224 → 78-79% test AUC")
print(f"Expected gain:  +2-3% AUC")
print(f"Training time:  ~20-24 hours")
print("="*70)


Loaded pretrained weights for efficientnet-b0
NEW MODEL INITIALIZED FOR 224x224 TRAINING
Previous model: 128x128 → 76.41% test AUC
Target:         224x224 → 78-79% test AUC
Expected gain:  +2-3% AUC
Training time:  ~20-24 hours


  scaler = GradScaler()


In [9]:
# ========================================================================
# TRAINING FUNCTIONS
# ========================================================================

def train_epoch(model, loader, criterion, optimizer, device, epoch, scaler):
    model.train()
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    
    print(f"\n🔄 Training Epoch {epoch+1}")
    epoch_start = time.time()
    
    for batch_idx, (images, labels) in enumerate(loader):
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        
        optimizer.zero_grad(set_to_none=True)
        
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()
        
        running_loss += loss.item()
        
        if batch_idx % 5 == 0:
            all_labels.append(labels.cpu().numpy())
            predictions = torch.sigmoid(outputs).detach().cpu().numpy()
            all_predictions.append(predictions)
        
        if (batch_idx + 1) % 50 == 0:
            avg_loss = running_loss / (batch_idx + 1)
            elapsed = time.time() - epoch_start
            speed = (batch_idx + 1) / elapsed
            eta = (len(loader) - batch_idx - 1) / speed / 60
            print(f"  [{batch_idx+1}/{len(loader)}] Loss: {avg_loss:.4f} | "
                  f"{speed:.2f} batch/s | ETA: {eta:.1f}min")
    
    if len(all_labels) > 0:
        all_labels = np.vstack(all_labels)
        all_predictions = np.vstack(all_predictions)
        avg_loss = running_loss / len(loader)
        try:
            auc_macro = roc_auc_score(all_labels, all_predictions, average='macro')
            auc_weighted = roc_auc_score(all_labels, all_predictions, average='weighted')
        except:
            auc_macro = 0.0
            auc_weighted = 0.0
    else:
        avg_loss = running_loss / len(loader)
        auc_macro = 0.0
        auc_weighted = 0.0
    
    return avg_loss, auc_macro, auc_weighted


def validate_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            
            with autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            all_labels.append(labels.cpu().numpy())
            predictions = torch.sigmoid(outputs).cpu().numpy()
            all_predictions.append(predictions)
    
    all_labels = np.vstack(all_labels)
    all_predictions = np.vstack(all_predictions)
    avg_loss = running_loss / len(loader)
    
    auc_macro = roc_auc_score(all_labels, all_predictions, average='macro')
    auc_weighted = roc_auc_score(all_labels, all_predictions, average='weighted')
    
    per_class_auc = []
    for i in range(all_labels.shape[1]):
        try:
            auc = roc_auc_score(all_labels[:, i], all_predictions[:, i])
            per_class_auc.append(auc)
        except:
            per_class_auc.append(0.0)
    
    return avg_loss, auc_macro, auc_weighted, per_class_auc

print("✅ Training functions ready")


✅ Training functions ready


In [10]:
# ========================================================================
# START 224x224 HIGH-RESOLUTION TRAINING
# ========================================================================

def train_model_hires():
    best_val_auc = 0.0
    early_stop_counter = 0
    training_start_time = time.time()
    
    pathologies = [
        'Atelectasis', 'Consolidation', 'Infiltration', 
        'Pneumothorax', 'Edema', 'Emphysema', 'Fibrosis', 
        'Effusion', 'Pneumonia', 'Pleural_Thickening', 
        'Cardiomegaly', 'Nodule', 'Mass', 'Hernia'
    ]
    
    print("\n" + "="*70)
    print("🚀 STARTING 224x224 HIGH-RESOLUTION TRAINING")
    print("="*70)
    print(f"Previous model (128x128): 76.41% test AUC")
    print(f"Target (224x224):         78-79% test AUC")
    print(f"Expected training time:   20-24 hours")
    print("="*70)
    
    for epoch in range(NUM_EPOCHS):
        epoch_start = time.time()
        
        print(f"\n{'='*70}")
        print(f"EPOCH [{epoch+1}/{NUM_EPOCHS}]")
        print(f"{'='*70}")
        
        # Train
        train_loss, train_auc_macro, train_auc_weighted = train_epoch(
            model_hires, train_loader, criterion, optimizer, device, epoch, scaler
        )
        
        # Validate
        val_loss, val_auc_macro, val_auc_weighted, per_class_auc = validate_epoch(
            model_hires, val_loader, criterion, device
        )
        
        # Update LR
        old_lr = optimizer.param_groups[0]['lr']
        scheduler.step(val_auc_macro)
        current_lr = optimizer.param_groups[0]['lr']
        
        if old_lr != current_lr:
            print(f"\n📉 LR: {old_lr:.2e} → {current_lr:.2e}")
        
        epoch_time = time.time() - epoch_start
        total_elapsed = (time.time() - training_start_time) / 3600
        
        # Log
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Loss/val', val_loss, epoch)
        writer.add_scalar('AUC/train', train_auc_macro, epoch)
        writer.add_scalar('AUC/val', val_auc_macro, epoch)
        writer.add_scalar('LR', current_lr, epoch)
        
        # Print
        print(f"\n📊 Results:")
        print(f"  Train: Loss={train_loss:.4f}, AUC={train_auc_macro:.4f}")
        print(f"  Val:   Loss={val_loss:.4f}, AUC={val_auc_macro:.4f}")
        print(f"  Time: {epoch_time/60:.1f}min | Total: {total_elapsed:.2f}hrs")
        
        # Compare with previous model
        improvement_vs_128 = val_auc_macro - 0.8024  # Your 128x128 val AUC
        print(f"  vs 128x128: {improvement_vs_128:+.4f} ({improvement_vs_128*100:+.2f}%)")
        
        if epoch > 0:
            avg_time = (time.time() - training_start_time) / (epoch + 1)
            eta = (avg_time * (NUM_EPOCHS - epoch - 1)) / 3600
            print(f"  ETA: {eta:.2f} hours")
        
        # Save best
        if val_auc_macro > best_val_auc:
            best_val_auc = val_auc_macro
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model_hires.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_auc_macro': val_auc_macro,
                'per_class_auc': per_class_auc,
                'config': config_hires
            }, f'experiments/{experiment_name_hires}/best_model.pth')
            print(f"\n  ✅ Best model saved! AUC: {best_val_auc:.4f}")
            early_stop_counter = 0
        else:
            early_stop_counter += 1
            print(f"\n  ⏳ No improvement ({early_stop_counter}/{EARLY_STOPPING_PATIENCE})")
        
        if early_stop_counter >= EARLY_STOPPING_PATIENCE:
            print(f"\n⚠️ Early stopping at epoch {epoch+1}")
            break
    
    total_time = time.time() - training_start_time
    print("\n" + "="*70)
    print("✅ 224x224 TRAINING COMPLETE!")
    print(f"Best AUC: {best_val_auc:.4f}")
    print(f"Time: {total_time/3600:.2f} hours")
    print(f"Improvement over 128x128: {(best_val_auc-0.8024)*100:+.2f}%")
    print("="*70)
    
    writer.close()
    return best_val_auc

# START TRAINING
print(f"\n⏰ Starting: {time.strftime('%I:%M %p IST')}")
print(f"⏰ Expected: ~{time.strftime('%I:%M %p', time.localtime(time.time() + 86400))} IST (tomorrow)")

best_auc_hires = train_model_hires()



⏰ Starting: 10:12 AM IST
⏰ Expected: ~10:12 AM IST (tomorrow)

🚀 STARTING 224x224 HIGH-RESOLUTION TRAINING
Previous model (128x128): 76.41% test AUC
Target (224x224):         78-79% test AUC
Expected training time:   20-24 hours

EPOCH [1/30]

🔄 Training Epoch 1


  with autocast():


  [50/2164] Loss: 1.3224 | 0.35 batch/s | ETA: 101.4min
  [100/2164] Loss: 1.3472 | 0.35 batch/s | ETA: 99.1min
  [150/2164] Loss: 1.3238 | 0.35 batch/s | ETA: 96.2min
  [200/2164] Loss: 1.3261 | 0.35 batch/s | ETA: 93.7min
  [250/2164] Loss: 1.3257 | 0.35 batch/s | ETA: 91.0min
  [300/2164] Loss: 1.3332 | 0.35 batch/s | ETA: 88.7min
  [350/2164] Loss: 1.3169 | 0.35 batch/s | ETA: 85.8min
  [400/2164] Loss: 1.3117 | 0.35 batch/s | ETA: 84.0min
  [450/2164] Loss: 1.3042 | 0.34 batch/s | ETA: 84.5min
  [500/2164] Loss: 1.2902 | 0.34 batch/s | ETA: 80.5min
  [550/2164] Loss: 1.2862 | 0.36 batch/s | ETA: 75.4min
  [600/2164] Loss: 1.2688 | 0.37 batch/s | ETA: 70.8min
  [650/2164] Loss: 1.2740 | 0.38 batch/s | ETA: 67.0min
  [700/2164] Loss: 1.2699 | 0.39 batch/s | ETA: 63.3min
  [750/2164] Loss: 1.2542 | 0.39 batch/s | ETA: 60.0min
  [800/2164] Loss: 1.2461 | 0.40 batch/s | ETA: 56.9min
  [850/2164] Loss: 1.2322 | 0.41 batch/s | ETA: 53.9min
  [900/2164] Loss: 1.2318 | 0.41 batch/s | ETA: 

  with autocast():



📊 Results:
  Train: Loss=1.1889, AUC=0.7433
  Val:   Loss=1.1609, AUC=0.7956
  Time: 92.0min | Total: 1.53hrs
  vs 128x128: -0.0068 (-0.68%)

  ✅ Best model saved! AUC: 0.7956

EPOCH [2/30]

🔄 Training Epoch 2


  with autocast():


  [50/2164] Loss: 1.0430 | 0.55 batch/s | ETA: 64.6min
  [100/2164] Loss: 0.9800 | 0.55 batch/s | ETA: 62.6min
  [150/2164] Loss: 1.0067 | 0.55 batch/s | ETA: 61.2min
  [200/2164] Loss: 1.1152 | 0.55 batch/s | ETA: 59.9min
  [250/2164] Loss: 1.0763 | 0.54 batch/s | ETA: 58.8min
  [300/2164] Loss: 1.0711 | 0.54 batch/s | ETA: 57.6min
  [350/2164] Loss: 1.0859 | 0.54 batch/s | ETA: 56.0min
  [400/2164] Loss: 1.0678 | 0.54 batch/s | ETA: 54.5min
  [450/2164] Loss: 1.0830 | 0.54 batch/s | ETA: 53.0min
  [500/2164] Loss: 1.0998 | 0.54 batch/s | ETA: 51.4min
  [550/2164] Loss: 1.0903 | 0.54 batch/s | ETA: 49.9min
  [600/2164] Loss: 1.0924 | 0.54 batch/s | ETA: 48.4min
  [650/2164] Loss: 1.0840 | 0.54 batch/s | ETA: 46.9min
  [700/2164] Loss: 1.0799 | 0.54 batch/s | ETA: 45.3min
  [750/2164] Loss: 1.0704 | 0.54 batch/s | ETA: 43.7min
  [800/2164] Loss: 1.0665 | 0.54 batch/s | ETA: 42.2min
  [850/2164] Loss: 1.0652 | 0.54 batch/s | ETA: 40.7min
  [900/2164] Loss: 1.0614 | 0.54 batch/s | ETA: 3

  with autocast():



📊 Results:
  Train: Loss=1.0604, AUC=0.8044
  Val:   Loss=1.1231, AUC=0.8135
  Time: 83.7min | Total: 2.93hrs
  vs 128x128: +0.0111 (+1.11%)
  ETA: 40.98 hours

  ✅ Best model saved! AUC: 0.8135

EPOCH [3/30]

🔄 Training Epoch 3


  with autocast():


  [50/2164] Loss: 0.9043 | 0.55 batch/s | ETA: 64.1min
  [100/2164] Loss: 0.9662 | 0.54 batch/s | ETA: 63.3min
  [150/2164] Loss: 0.9559 | 0.54 batch/s | ETA: 61.8min
  [200/2164] Loss: 0.9475 | 0.54 batch/s | ETA: 60.4min
  [250/2164] Loss: 0.9389 | 0.54 batch/s | ETA: 58.9min
  [300/2164] Loss: 0.9354 | 0.54 batch/s | ETA: 57.7min
  [350/2164] Loss: 0.9507 | 0.54 batch/s | ETA: 56.3min
  [400/2164] Loss: 0.9542 | 0.54 batch/s | ETA: 54.8min
  [450/2164] Loss: 0.9612 | 0.54 batch/s | ETA: 53.2min
  [500/2164] Loss: 0.9572 | 0.54 batch/s | ETA: 51.7min
  [550/2164] Loss: 0.9491 | 0.54 batch/s | ETA: 50.1min
  [600/2164] Loss: 0.9453 | 0.54 batch/s | ETA: 48.5min
  [650/2164] Loss: 0.9392 | 0.54 batch/s | ETA: 47.0min
  [700/2164] Loss: 0.9434 | 0.54 batch/s | ETA: 45.5min
  [750/2164] Loss: 0.9386 | 0.54 batch/s | ETA: 43.9min
  [800/2164] Loss: 0.9390 | 0.54 batch/s | ETA: 42.4min
  [850/2164] Loss: 0.9386 | 0.54 batch/s | ETA: 40.8min
  [900/2164] Loss: 0.9406 | 0.54 batch/s | ETA: 3

  with autocast():



📊 Results:
  Train: Loss=0.9827, AUC=0.8408
  Val:   Loss=1.0591, AUC=0.8238
  Time: 83.5min | Total: 4.32hrs
  vs 128x128: +0.0214 (+2.14%)
  ETA: 38.87 hours

  ✅ Best model saved! AUC: 0.8238

EPOCH [4/30]

🔄 Training Epoch 4


  with autocast():


  [50/2164] Loss: 0.8516 | 0.54 batch/s | ETA: 65.4min
  [100/2164] Loss: 0.8431 | 0.55 batch/s | ETA: 63.1min
  [150/2164] Loss: 0.8827 | 0.54 batch/s | ETA: 61.8min
  [200/2164] Loss: 0.8923 | 0.54 batch/s | ETA: 60.4min
  [250/2164] Loss: 0.8997 | 0.54 batch/s | ETA: 59.5min
  [300/2164] Loss: 0.9113 | 0.54 batch/s | ETA: 58.0min
  [350/2164] Loss: 0.9126 | 0.53 batch/s | ETA: 56.7min
  [400/2164] Loss: 0.9131 | 0.54 batch/s | ETA: 54.9min
  [450/2164] Loss: 0.9067 | 0.54 batch/s | ETA: 53.3min
  [500/2164] Loss: 0.9032 | 0.54 batch/s | ETA: 51.7min
  [550/2164] Loss: 0.8947 | 0.53 batch/s | ETA: 50.7min
  [600/2164] Loss: 0.8874 | 0.50 batch/s | ETA: 51.7min
  [650/2164] Loss: 0.9025 | 0.48 batch/s | ETA: 52.7min
  [700/2164] Loss: 0.8975 | 0.47 batch/s | ETA: 52.4min
  [750/2164] Loss: 0.9075 | 0.45 batch/s | ETA: 52.0min
  [800/2164] Loss: 0.9041 | 0.44 batch/s | ETA: 51.3min
  [850/2164] Loss: 0.9002 | 0.43 batch/s | ETA: 50.6min
  [900/2164] Loss: 0.8969 | 0.43 batch/s | ETA: 4

  with autocast():



📊 Results:
  Train: Loss=0.9172, AUC=0.8543
  Val:   Loss=1.1489, AUC=0.8265
  Time: 110.7min | Total: 6.16hrs
  vs 128x128: +0.0241 (+2.41%)
  ETA: 40.07 hours

  ✅ Best model saved! AUC: 0.8265

EPOCH [5/30]

🔄 Training Epoch 5


  with autocast():


  [50/2164] Loss: 0.7786 | 0.44 batch/s | ETA: 80.8min
  [100/2164] Loss: 0.7972 | 0.43 batch/s | ETA: 80.3min
  [150/2164] Loss: 0.7900 | 0.44 batch/s | ETA: 76.9min
  [200/2164] Loss: 0.7871 | 0.43 batch/s | ETA: 75.7min
  [250/2164] Loss: 0.8063 | 0.43 batch/s | ETA: 74.8min
  [300/2164] Loss: 0.8055 | 0.43 batch/s | ETA: 72.8min
  [350/2164] Loss: 0.8081 | 0.43 batch/s | ETA: 71.0min
  [400/2164] Loss: 0.8081 | 0.43 batch/s | ETA: 68.5min
  [450/2164] Loss: 0.8152 | 0.44 batch/s | ETA: 65.5min
  [500/2164] Loss: 0.8052 | 0.44 batch/s | ETA: 63.2min
  [550/2164] Loss: 0.8090 | 0.44 batch/s | ETA: 61.2min
  [600/2164] Loss: 0.8073 | 0.45 batch/s | ETA: 58.4min
  [650/2164] Loss: 0.8035 | 0.45 batch/s | ETA: 56.6min
  [700/2164] Loss: 0.8055 | 0.45 batch/s | ETA: 54.6min
  [750/2164] Loss: 0.8083 | 0.45 batch/s | ETA: 52.8min
  [800/2164] Loss: 0.8085 | 0.44 batch/s | ETA: 51.1min
  [850/2164] Loss: 0.8208 | 0.44 batch/s | ETA: 49.3min
  [900/2164] Loss: 0.8174 | 0.44 batch/s | ETA: 4

  with autocast():



📊 Results:
  Train: Loss=0.8324, AUC=0.8680
  Val:   Loss=1.1965, AUC=0.8224
  Time: 103.1min | Total: 7.88hrs
  vs 128x128: +0.0200 (+2.00%)
  ETA: 39.41 hours

  ⏳ No improvement (1/7)

EPOCH [6/30]

🔄 Training Epoch 6


  with autocast():


  [50/2164] Loss: 0.7006 | 0.45 batch/s | ETA: 77.9min
  [100/2164] Loss: 0.7356 | 0.46 batch/s | ETA: 74.8min
  [150/2164] Loss: 0.7376 | 0.46 batch/s | ETA: 73.2min
  [200/2164] Loss: 0.7700 | 0.46 batch/s | ETA: 71.7min
  [250/2164] Loss: 0.7620 | 0.45 batch/s | ETA: 70.2min
  [300/2164] Loss: 0.7707 | 0.45 batch/s | ETA: 68.5min
  [350/2164] Loss: 0.7750 | 0.45 batch/s | ETA: 67.2min
  [400/2164] Loss: 0.7688 | 0.45 batch/s | ETA: 65.9min
  [450/2164] Loss: 0.7800 | 0.44 batch/s | ETA: 64.3min
  [500/2164] Loss: 0.7751 | 0.44 batch/s | ETA: 62.6min
  [550/2164] Loss: 0.7777 | 0.44 batch/s | ETA: 60.7min
  [600/2164] Loss: 0.7676 | 0.44 batch/s | ETA: 58.8min
  [650/2164] Loss: 0.7719 | 0.44 batch/s | ETA: 57.0min
  [700/2164] Loss: 0.7840 | 0.44 batch/s | ETA: 55.2min
  [750/2164] Loss: 0.7797 | 0.44 batch/s | ETA: 53.4min
  [800/2164] Loss: 0.7775 | 0.44 batch/s | ETA: 51.5min
  [850/2164] Loss: 0.7759 | 0.44 batch/s | ETA: 49.6min
  [900/2164] Loss: 0.7749 | 0.44 batch/s | ETA: 4

  with autocast():



📊 Results:
  Train: Loss=0.7648, AUC=0.8835
  Val:   Loss=1.3413, AUC=0.8209
  Time: 92.6min | Total: 9.43hrs
  vs 128x128: +0.0185 (+1.85%)
  ETA: 37.70 hours

  ⏳ No improvement (2/7)

EPOCH [7/30]

🔄 Training Epoch 7


  with autocast():


  [50/2164] Loss: 0.6789 | 0.74 batch/s | ETA: 47.9min
  [100/2164] Loss: 0.6713 | 0.73 batch/s | ETA: 47.3min
  [150/2164] Loss: 0.7005 | 0.72 batch/s | ETA: 46.5min
  [200/2164] Loss: 0.6886 | 0.72 batch/s | ETA: 45.5min
  [250/2164] Loss: 0.7091 | 0.71 batch/s | ETA: 45.1min
  [300/2164] Loss: 0.7257 | 0.70 batch/s | ETA: 44.4min
  [350/2164] Loss: 0.7256 | 0.70 batch/s | ETA: 43.4min
  [400/2164] Loss: 0.7192 | 0.69 batch/s | ETA: 42.3min
  [450/2164] Loss: 0.7163 | 0.69 batch/s | ETA: 41.3min
  [500/2164] Loss: 0.7181 | 0.69 batch/s | ETA: 40.1min
  [550/2164] Loss: 0.7119 | 0.69 batch/s | ETA: 38.9min
  [600/2164] Loss: 0.7121 | 0.69 batch/s | ETA: 37.5min
  [650/2164] Loss: 0.7106 | 0.70 batch/s | ETA: 36.3min
  [700/2164] Loss: 0.7105 | 0.70 batch/s | ETA: 35.1min
  [750/2164] Loss: 0.7050 | 0.70 batch/s | ETA: 33.8min
  [800/2164] Loss: 0.7024 | 0.69 batch/s | ETA: 33.0min
  [850/2164] Loss: 0.7019 | 0.68 batch/s | ETA: 32.3min
  [900/2164] Loss: 0.7003 | 0.66 batch/s | ETA: 3

  with autocast():



📊 Results:
  Train: Loss=0.7048, AUC=0.8975
  Val:   Loss=1.5830, AUC=0.8184
  Time: 69.8min | Total: 10.59hrs
  vs 128x128: +0.0160 (+1.60%)
  ETA: 34.79 hours

  ⏳ No improvement (3/7)

EPOCH [8/30]

🔄 Training Epoch 8


  with autocast():


  [50/2164] Loss: 0.6224 | 0.68 batch/s | ETA: 52.0min
  [100/2164] Loss: 0.6089 | 0.67 batch/s | ETA: 51.0min
  [150/2164] Loss: 0.6083 | 0.68 batch/s | ETA: 49.4min
  [200/2164] Loss: 0.6202 | 0.68 batch/s | ETA: 48.1min
  [250/2164] Loss: 0.6134 | 0.68 batch/s | ETA: 47.0min
  [300/2164] Loss: 0.6186 | 0.68 batch/s | ETA: 45.9min
  [350/2164] Loss: 0.6175 | 0.68 batch/s | ETA: 44.8min
  [400/2164] Loss: 0.6118 | 0.67 batch/s | ETA: 43.6min
  [450/2164] Loss: 0.6173 | 0.67 batch/s | ETA: 42.4min
  [500/2164] Loss: 0.6192 | 0.67 batch/s | ETA: 41.2min
  [550/2164] Loss: 0.6201 | 0.67 batch/s | ETA: 40.2min
  [600/2164] Loss: 0.6235 | 0.67 batch/s | ETA: 38.8min
  [650/2164] Loss: 0.6367 | 0.67 batch/s | ETA: 37.6min
  [700/2164] Loss: 0.6427 | 0.67 batch/s | ETA: 36.4min
  [750/2164] Loss: 0.6433 | 0.67 batch/s | ETA: 35.2min
  [800/2164] Loss: 0.6457 | 0.67 batch/s | ETA: 33.9min
  [850/2164] Loss: 0.6478 | 0.67 batch/s | ETA: 32.7min
  [900/2164] Loss: 0.6471 | 0.67 batch/s | ETA: 3

  with autocast():



📉 LR: 1.00e-04 → 5.00e-05

📊 Results:
  Train: Loss=0.6596, AUC=0.9074
  Val:   Loss=1.8609, AUC=0.8124
  Time: 81.4min | Total: 11.95hrs
  vs 128x128: +0.0100 (+1.00%)
  ETA: 32.85 hours

  ⏳ No improvement (4/7)

EPOCH [9/30]

🔄 Training Epoch 9


  with autocast():


  [50/2164] Loss: 0.5522 | 0.75 batch/s | ETA: 47.2min
  [100/2164] Loss: 0.5587 | 0.75 batch/s | ETA: 46.0min
  [150/2164] Loss: 0.5796 | 0.74 batch/s | ETA: 45.3min
  [200/2164] Loss: 0.5746 | 0.74 batch/s | ETA: 44.5min
  [250/2164] Loss: 0.5762 | 0.73 batch/s | ETA: 43.5min
  [300/2164] Loss: 0.5766 | 0.73 batch/s | ETA: 42.6min
  [350/2164] Loss: 0.5882 | 0.72 batch/s | ETA: 41.7min
  [400/2164] Loss: 0.5836 | 0.72 batch/s | ETA: 40.9min
  [450/2164] Loss: 0.5789 | 0.72 batch/s | ETA: 39.9min
  [500/2164] Loss: 0.5807 | 0.71 batch/s | ETA: 38.9min
  [550/2164] Loss: 0.5815 | 0.71 batch/s | ETA: 37.8min
  [600/2164] Loss: 0.5778 | 0.71 batch/s | ETA: 36.6min
  [650/2164] Loss: 0.5776 | 0.71 batch/s | ETA: 35.5min
  [700/2164] Loss: 0.5791 | 0.71 batch/s | ETA: 34.5min
  [750/2164] Loss: 0.5791 | 0.71 batch/s | ETA: 33.3min
  [800/2164] Loss: 0.5776 | 0.71 batch/s | ETA: 32.1min
  [850/2164] Loss: 0.5761 | 0.71 batch/s | ETA: 31.0min
  [900/2164] Loss: 0.5748 | 0.71 batch/s | ETA: 2

  with autocast():



📊 Results:
  Train: Loss=0.5778, AUC=0.9221
  Val:   Loss=2.0247, AUC=0.8089
  Time: 63.6min | Total: 13.01hrs
  vs 128x128: +0.0065 (+0.65%)
  ETA: 30.35 hours

  ⏳ No improvement (5/7)

EPOCH [10/30]

🔄 Training Epoch 10


  with autocast():


  [50/2164] Loss: 0.4979 | 0.73 batch/s | ETA: 48.5min
  [100/2164] Loss: 0.5279 | 0.72 batch/s | ETA: 47.5min
  [150/2164] Loss: 0.5412 | 0.73 batch/s | ETA: 45.9min
  [200/2164] Loss: 0.5354 | 0.73 batch/s | ETA: 44.9min
  [250/2164] Loss: 0.5294 | 0.72 batch/s | ETA: 44.1min
  [300/2164] Loss: 0.5247 | 0.72 batch/s | ETA: 43.0min
  [350/2164] Loss: 0.5285 | 0.72 batch/s | ETA: 41.8min
  [400/2164] Loss: 0.5272 | 0.72 batch/s | ETA: 40.8min
  [450/2164] Loss: 0.5221 | 0.72 batch/s | ETA: 39.6min
  [500/2164] Loss: 0.5201 | 0.72 batch/s | ETA: 38.4min
  [550/2164] Loss: 0.5182 | 0.72 batch/s | ETA: 37.4min
  [600/2164] Loss: 0.5186 | 0.72 batch/s | ETA: 36.3min
  [650/2164] Loss: 0.5172 | 0.72 batch/s | ETA: 35.1min
  [700/2164] Loss: 0.5179 | 0.72 batch/s | ETA: 34.0min
  [750/2164] Loss: 0.5174 | 0.72 batch/s | ETA: 32.8min
  [800/2164] Loss: 0.5179 | 0.72 batch/s | ETA: 31.7min
  [850/2164] Loss: 0.5191 | 0.72 batch/s | ETA: 30.5min
  [900/2164] Loss: 0.5188 | 0.72 batch/s | ETA: 2

  with autocast():



📊 Results:
  Train: Loss=0.5398, AUC=0.9291
  Val:   Loss=2.1835, AUC=0.8070
  Time: 65.0min | Total: 14.09hrs
  vs 128x128: +0.0046 (+0.46%)
  ETA: 28.18 hours

  ⏳ No improvement (6/7)

EPOCH [11/30]

🔄 Training Epoch 11


  with autocast():


  [50/2164] Loss: 0.4736 | 0.48 batch/s | ETA: 72.9min
  [100/2164] Loss: 0.4768 | 0.47 batch/s | ETA: 72.5min
  [150/2164] Loss: 0.4812 | 0.47 batch/s | ETA: 71.0min
  [200/2164] Loss: 0.4845 | 0.47 batch/s | ETA: 69.6min
  [250/2164] Loss: 0.4876 | 0.47 batch/s | ETA: 68.2min
  [300/2164] Loss: 0.4974 | 0.46 batch/s | ETA: 67.5min
  [350/2164] Loss: 0.4922 | 0.45 batch/s | ETA: 67.1min
  [400/2164] Loss: 0.4972 | 0.44 batch/s | ETA: 66.1min
  [450/2164] Loss: 0.4986 | 0.44 batch/s | ETA: 64.8min
  [500/2164] Loss: 0.4983 | 0.44 batch/s | ETA: 62.6min
  [550/2164] Loss: 0.4955 | 0.44 batch/s | ETA: 61.3min
  [600/2164] Loss: 0.4967 | 0.44 batch/s | ETA: 59.6min
  [650/2164] Loss: 0.4960 | 0.45 batch/s | ETA: 56.2min
  [700/2164] Loss: 0.4972 | 0.46 batch/s | ETA: 53.0min
  [750/2164] Loss: 0.4986 | 0.47 batch/s | ETA: 50.0min
  [800/2164] Loss: 0.4969 | 0.48 batch/s | ETA: 47.2min
  [850/2164] Loss: 0.4999 | 0.49 batch/s | ETA: 44.6min
  [900/2164] Loss: 0.4984 | 0.50 batch/s | ETA: 4

  with autocast():



📊 Results:
  Train: Loss=0.5089, AUC=0.9337
  Val:   Loss=2.4142, AUC=0.8089
  Time: 72.4min | Total: 15.30hrs
  vs 128x128: +0.0065 (+0.65%)
  ETA: 26.42 hours

  ⏳ No improvement (7/7)

⚠️ Early stopping at epoch 11

✅ 224x224 TRAINING COMPLETE!
Best AUC: 0.8265
Time: 15.30 hours
Improvement over 128x128: +2.41%
