### Check RTX5090 running CUDA

In [None]:
import torch
import torchvision.models as models

# Check if CUDA
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("❌ CUDA not detected by PyTorch")

# Test ResNet50 specifically
model = models.resnet50(pretrained=True).cuda()
test_batch = torch.randn(2, 3, 224, 224).cuda()
try:
    output = model(test_batch)
    print("ResNet50 works!")
except Exception as e:
    print(f"ResNet50 failed: {e}")

# Test EfficientNet
try:
    model_eff = models.efficientnet_b0(pretrained=True).cuda()
    output_eff = model_eff(test_batch)
    print("EfficientNet works!")
except Exception as e:
    print(f"EfficientNet failed: {e}")

### Process deer data

In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import cv2
import random
import json
import os
import glob
import gc
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split, StratifiedKFold
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

IMAGE_SIZE = (672, 1344)
AUGMENTATION_TARGET = 1000
NUM_FOLDS = 5
MIXED_PRECISION = True
NUM_WORKERS = 0
PIN_MEMORY = True
GRADIENT_ACCUMULATION_STEPS = 2

MODEL_CONFIGS = {
    'efficientnet_b0': {'model_fn': models.efficientnet_b0, 'batch_size': 48, 'freeze_layers': 3},
    'efficientnet_b1': {'model_fn': models.efficientnet_b1, 'batch_size': 36, 'freeze_layers': 3},
    'efficientnet_b2': {'model_fn': models.efficientnet_b2, 'batch_size': 30, 'freeze_layers': 3},
    'efficientnet_b3': {'model_fn': models.efficientnet_b3, 'batch_size': 24, 'freeze_layers': 3},
    'efficientnet_b4': {'model_fn': models.efficientnet_b4, 'batch_size': 18, 'freeze_layers': 3},
    'efficientnet_b5': {'model_fn': models.efficientnet_b5, 'batch_size': 12, 'freeze_layers': 3},
    'efficientnet_b6': {'model_fn': models.efficientnet_b6, 'batch_size': 9, 'freeze_layers': 3},
    'efficientnet_b7': {'model_fn': models.efficientnet_b7, 'batch_size': 6, 'freeze_layers': 3},
    'resnet18': {'model_fn': models.resnet18, 'batch_size': 72, 'freeze_layers': 2},
    'resnet34': {'model_fn': models.resnet34, 'batch_size': 60, 'freeze_layers': 2},
    'resnet50': {'model_fn': models.resnet50, 'batch_size': 48, 'freeze_layers': 2},
    'resnet101': {'model_fn': models.resnet101, 'batch_size': 30, 'freeze_layers': 2},
    'resnet152': {'model_fn': models.resnet152, 'batch_size': 24, 'freeze_layers': 2},
    'densenet121': {'model_fn': models.densenet121, 'batch_size': 36, 'freeze_layers': 2},
    'densenet169': {'model_fn': models.densenet169, 'batch_size': 30, 'freeze_layers': 2},
    'densenet201': {'model_fn': models.densenet201, 'batch_size': 24, 'freeze_layers': 2},
    'mobilenet_v2': {'model_fn': models.mobilenet_v2, 'batch_size': 60, 'freeze_layers': 3},
    'mobilenet_v3_small': {'model_fn': models.mobilenet_v3_small, 'batch_size': 72, 'freeze_layers': 3},
    'mobilenet_v3_large': {'model_fn': models.mobilenet_v3_large, 'batch_size': 60, 'freeze_layers': 3},
    'regnet_y_400mf': {'model_fn': models.regnet_y_400mf, 'batch_size': 48, 'freeze_layers': 2},
    'regnet_y_800mf': {'model_fn': models.regnet_y_800mf, 'batch_size': 36, 'freeze_layers': 2},
    'regnet_y_1_6gf': {'model_fn': models.regnet_y_1_6gf, 'batch_size': 30, 'freeze_layers': 2},
    'regnet_y_3_2gf': {'model_fn': models.regnet_y_3_2gf, 'batch_size': 24, 'freeze_layers': 2},
    'convnext_tiny': {'model_fn': models.convnext_tiny, 'batch_size': 36, 'freeze_layers': 2},
    'convnext_small': {'model_fn': models.convnext_small, 'batch_size': 30, 'freeze_layers': 2},
    'convnext_base': {'model_fn': models.convnext_base, 'batch_size': 24, 'freeze_layers': 2},
    'maxvit_t': {'model_fn': models.maxvit_t, 'batch_size': 18, 'freeze_layers': 2},
    'swin_t': {'model_fn': models.swin_t, 'batch_size': 24, 'freeze_layers': 2},
    'swin_s': {'model_fn': models.swin_s, 'batch_size': 18, 'freeze_layers': 2},
    'swin_b': {'model_fn': models.swin_b, 'batch_size': 12, 'freeze_layers': 2},
    'vit_b_16': {'model_fn': models.vit_b_16, 'batch_size': 24, 'freeze_layers': 6},
    'vit_b_32': {'model_fn': models.vit_b_32, 'batch_size': 36, 'freeze_layers': 6},
    'vit_l_16': {'model_fn': models.vit_l_16, 'batch_size': 12, 'freeze_layers': 8},
}

TRAINING_CONFIG = {
    'backbone_lr': 0.0001,
    'classifier_lr': 0.0005,
    'optimizer': 'adamw',
    'weight_decay': 0.05,
    'scheduler': 'cosine',
    'label_smoothing': 0.1,
    'dropout': 0.3,
    'max_epochs': 80,
    'patience': 25,
    'augmentation_strength': 'medium'
}

def load_jawbone_data():
    fpath = "D:\\Dropbox\\AI Projects\\buck\\jawbone\\images\\*.png"
    
    image_paths = glob.glob(fpath)
    if not image_paths:
        raise FileNotFoundError(f"No images found at {fpath}")
    
    images = []
    ages = []
    
    for img_path in image_paths:
        try:
            img = cv2.imread(img_path)
            if img is None:
                continue
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
            
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            parts = filename_no_ext.split('_')
            
            if len(parts) < 3:
                continue
            
            bbb_part = parts[1]
            
            if 'p' not in bbb_part:
                continue
            
            value_str = bbb_part.replace('p', '.')
            try:
                age_value = float(value_str)
            except ValueError:
                continue
            
            images.append(img_resized)
            ages.append(age_value)
            
        except Exception:
            continue
    
    if not images:
        raise ValueError("No valid images loaded")
    
    ages_grouped = [5.5 if age >= 5.5 else age for age in ages]
    
    age_counts = Counter(ages_grouped)
    valid_ages = {age for age, count in age_counts.items() if count >= 3}
    
    filtered_images = []
    filtered_ages = []
    
    for img, age in zip(images, ages_grouped):
        if age in valid_ages:
            filtered_images.append(img)
            filtered_ages.append(age)
    
    print(f"Total images: {len(filtered_images)}")
    print(f"Age distribution: {dict(Counter(filtered_ages))}")
    
    return np.array(filtered_images, dtype=np.uint8), filtered_ages

def enhanced_augment_image(image, strength='medium'):
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    
    if strength == 'light':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.5, 0.3, 0.6, 0.2, 0.1
        rot_range, bright_range = 8, (0.85, 1.15)
    elif strength == 'medium':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.7, 0.5, 0.8, 0.4, 0.3
        rot_range, bright_range = 12, (0.75, 1.25)
    else:
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.8, 0.6, 0.9, 0.5, 0.4
        rot_range, bright_range = 18, (0.65, 1.35)
    
    if random.random() < rot_prob:
        angle = random.uniform(-rot_range, rot_range)
        h, w = image.shape[:2]
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
        image = cv2.warpAffine(image, M, (w, h))
    
    if random.random() < flip_prob:
        image = cv2.flip(image, 1)
    
    if random.random() < bright_prob:
        alpha = random.uniform(*bright_range)
        beta = random.randint(-20, 20)
        image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    
    if random.random() < gamma_prob:
        gamma = random.uniform(0.85, 1.15)
        inv_gamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
        image = cv2.LUT(image, table)
    
    if random.random() < noise_prob:
        noise = np.random.normal(0, 5, image.shape).astype(np.int16)
        image_int16 = image.astype(np.int16)
        noisy_image = np.clip(image_int16 + noise, 0, 255)
        image = noisy_image.astype(np.uint8)
    
    return image

class OptimizedDataset(Dataset):
    def __init__(self, base_images, labels, aug_strength='medium', target_per_class=1000, training=True):
        self.base_images = base_images
        self.labels = np.array(labels)
        self.aug_strength = aug_strength
        self.training = training
        self.target_per_class = target_per_class
        
        unique_classes = np.unique(labels)
        self.class_to_indices = {}
        for cls in unique_classes:
            self.class_to_indices[cls] = np.where(self.labels == cls)[0]
        
        self.num_classes = len(unique_classes)
        self.class_list = sorted(unique_classes)
        self.length = self.num_classes * self.target_per_class
        
        self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(3, 1, 1)
        self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(3, 1, 1)
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        class_idx = idx // self.target_per_class
        within_class_idx = idx % self.target_per_class
        
        target_class = self.class_list[class_idx]
        available_indices = self.class_to_indices[target_class]
        
        base_idx = available_indices[within_class_idx % len(available_indices)]
        image = self.base_images[base_idx].copy()
        
        if self.training and within_class_idx >= len(available_indices):
            image = enhanced_augment_image(image, self.aug_strength)
        
        image = image.astype(np.float32) / 255.0
        if len(image.shape) == 3:
            image = image.transpose(2, 0, 1)
        
        if not self.training and random.random() < 0.5:
            image = np.flip(image, axis=2).copy()
        
        image = (image - self.mean) / self.std
        
        return torch.from_numpy(image.astype(np.float32)), target_class

class AcademicModelTrainer:
    def __init__(self, num_classes, save_dir=None):
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        if save_dir is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.save_dir = f"academic_jawbone_{timestamp}"
        else:
            self.save_dir = save_dir
        
        os.makedirs(self.save_dir, exist_ok=True)
        self.cv_results = []
        self.best_cv_model_info = None
        self.best_cv_score = 0.0
        
        print(f"Using device: {self.device}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name()}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
            
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.allow_tf32 = True
            torch.backends.cuda.matmul.allow_tf32 = True
            
            if MIXED_PRECISION:
                self.scaler = torch.amp.GradScaler('cuda')
            else:
                self.scaler = None
    
    def create_model(self, model_name, model_config):
        model = model_config['model_fn'](weights='DEFAULT')
        
        freeze_layers = model_config.get('freeze_layers', 2)
        
        if 'mobilenet' in model_name:
            layers_to_freeze = list(model.features.children())[:freeze_layers]
            for layer in layers_to_freeze:
                for param in layer.parameters():
                    param.requires_grad = False
            
            original_features = model.classifier[-1].in_features
            model.classifier = nn.Sequential(
                nn.Dropout(TRAINING_CONFIG['dropout']),
                nn.Linear(original_features, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
                nn.Linear(512, 256),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
                nn.Linear(256, self.num_classes)
            )
        
        elif hasattr(model, 'features') and hasattr(model, 'classifier'):
            layers_to_freeze = list(model.features.children())[:freeze_layers]
            for layer in layers_to_freeze:
                for param in layer.parameters():
                    param.requires_grad = False
            
            if isinstance(model.classifier, nn.Sequential):
                original_features = model.classifier[-1].in_features
            else:
                original_features = model.classifier.in_features
            
            model.classifier = nn.Sequential(
                nn.Dropout(TRAINING_CONFIG['dropout']),
                nn.Linear(original_features, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
                nn.Linear(512, 256),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
                nn.Linear(256, self.num_classes)
            )
        
        elif 'resnet' in model_name:
            layers_to_freeze = [model.conv1, model.bn1]
            if freeze_layers >= 1:
                layers_to_freeze.append(model.layer1)
            if freeze_layers >= 2:
                layers_to_freeze.append(model.layer2)
            
            for layer in layers_to_freeze:
                for param in layer.parameters():
                    param.requires_grad = False
            
            original_features = model.fc.in_features
            model.fc = nn.Sequential(
                nn.Dropout(TRAINING_CONFIG['dropout']),
                nn.Linear(original_features, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
                nn.Linear(512, 256),
                nn.ReLU(inplace=True),
                nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
                nn.Linear(256, self.num_classes)
            )
        
        elif 'vit' in model_name or 'swin' in model_name or 'maxvit' in model_name or 'convnext' in model_name:
            if hasattr(model, 'head'):
                if hasattr(model.head, 'in_features'):
                    original_features = model.head.in_features
                else:
                    original_features = model.head[-1].in_features
                
                model.head = nn.Sequential(
                    nn.Dropout(TRAINING_CONFIG['dropout']),
                    nn.Linear(original_features, 512),
                    nn.ReLU(inplace=True),
                    nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
                    nn.Linear(512, 256),
                    nn.ReLU(inplace=True),
                    nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
                    nn.Linear(256, self.num_classes)
                )
            elif hasattr(model, 'heads'):
                original_features = model.heads.head.in_features
                model.heads.head = nn.Sequential(
                    nn.Dropout(TRAINING_CONFIG['dropout']),
                    nn.Linear(original_features, 512),
                    nn.ReLU(inplace=True),
                    nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
                    nn.Linear(512, 256),
                    nn.ReLU(inplace=True),
                    nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
                    nn.Linear(256, self.num_classes)
                )
        
        trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
        frozen = sum(p.numel() for p in model.parameters() if not p.requires_grad)
        print(f"  Parameters: {trainable:,} trainable, {frozen:,} frozen")
        
        return model.to(self.device)
    
    def train_model(self, model, model_name, train_loader, val_loader, fold_num):
        backbone_params = []
        classifier_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'fc' in name or 'classifier' in name or 'head' in name:
                    classifier_params.append(param)
                else:
                    backbone_params.append(param)
        
        optimizer = optim.AdamW([
            {'params': backbone_params, 'lr': TRAINING_CONFIG['backbone_lr']},
            {'params': classifier_params, 'lr': TRAINING_CONFIG['classifier_lr']}
        ], weight_decay=TRAINING_CONFIG['weight_decay'])
        
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=TRAINING_CONFIG['max_epochs'])
        criterion = nn.CrossEntropyLoss(label_smoothing=TRAINING_CONFIG['label_smoothing'])
        
        best_val_acc = 0
        best_state = None
        patience_counter = 0
        
        for epoch in range(TRAINING_CONFIG['max_epochs']):
            model.train()
            train_correct = 0
            train_total = 0
            
            for batch_idx, (images, labels) in enumerate(train_loader):
                images, labels = images.to(self.device), labels.to(self.device)
                
                if MIXED_PRECISION and self.scaler is not None:
                    with torch.amp.autocast('cuda'):
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                        loss = loss / GRADIENT_ACCUMULATION_STEPS
                    
                    self.scaler.scale(loss).backward()
                    
                    if (batch_idx + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
                        self.scaler.step(optimizer)
                        self.scaler.update()
                        optimizer.zero_grad()
                else:
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss = loss / GRADIENT_ACCUMULATION_STEPS
                    loss.backward()
                    
                    if (batch_idx + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
                        optimizer.step()
                        optimizer.zero_grad()
                
                _, predicted = outputs.max(1)
                train_total += labels.size(0)
                train_correct += predicted.eq(labels).sum().item()
            
            train_acc = 100. * train_correct / train_total
            
            model.eval()
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    outputs = model(images)
                    _, predicted = outputs.max(1)
                    val_total += labels.size(0)
                    val_correct += predicted.eq(labels).sum().item()
            
            val_acc = 100. * val_correct / val_total
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_state = model.state_dict().copy()
                patience_counter = 0
            else:
                patience_counter += 1
            
            if epoch % 20 == 0:
                print(f"    Epoch {epoch}: Train {train_acc:.1f}%, Val {val_acc:.1f}%")
            
            if patience_counter >= TRAINING_CONFIG['patience']:
                print(f"    Early stopping at epoch {epoch}")
                break
            
            scheduler.step()
        
        return best_state, best_val_acc
    
    def run_cv_on_training_data(self, X_train, y_train, label_mapping):
        print(f"\nCROSS-VALIDATION ON TRAINING DATA")
        print(f"{NUM_FOLDS}-fold CV across {len(MODEL_CONFIGS)} architectures")
        print(f"Total CV experiments: {len(MODEL_CONFIGS) * NUM_FOLDS}")
        
        skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
        
        for model_name, model_config in MODEL_CONFIGS.items():
            print(f"\n{'='*60}")
            print(f"TESTING: {model_name.upper()}")
            print(f"{'='*60}")
            
            fold_scores = []
            
            for fold_idx, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train), 1):
                print(f"\nFold {fold_idx}/{NUM_FOLDS}")
                
                try:
                    X_fold_train = X_train[train_idx]
                    y_fold_train = y_train[train_idx]
                    X_fold_val = X_train[val_idx]
                    y_fold_val = y_train[val_idx]
                    
                    train_dataset = OptimizedDataset(X_fold_train, y_fold_train, 
                                                   TRAINING_CONFIG['augmentation_strength'], AUGMENTATION_TARGET, True)
                    val_dataset = OptimizedDataset(X_fold_val, y_fold_val, 'light', 200, False)
                    
                    batch_size = model_config['batch_size']
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                                            num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
                    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, 
                                          num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
                    
                    model = self.create_model(model_name, model_config)
                    best_state, val_acc = self.train_model(model, model_name, train_loader, val_loader, fold_idx)
                    
                    fold_scores.append(val_acc)
                    print(f"  Fold {fold_idx} Val Acc: {val_acc:.1f}%")
                    
                    del model, train_dataset, val_dataset, train_loader, val_loader
                    torch.cuda.empty_cache()
                    gc.collect()
                    
                except Exception as e:
                    print(f"  FAILED: {str(e)}")
                    torch.cuda.empty_cache()
                    gc.collect()
                    continue
            
            if fold_scores:
                mean_cv_score = np.mean(fold_scores)
                std_cv_score = np.std(fold_scores)
                
                self.cv_results.append({
                    'model_name': model_name,
                    'cv_scores': fold_scores,
                    'mean_cv': mean_cv_score,
                    'std_cv': std_cv_score
                })
                
                print(f"\n{model_name} CV Summary: {mean_cv_score:.1f}% ± {std_cv_score:.1f}%")
                
                if mean_cv_score > self.best_cv_score:
                    self.best_cv_score = mean_cv_score
                    self.best_cv_model_info = {
                        'model_name': model_name,
                        'model_config': model_config,
                        'mean_cv': mean_cv_score,
                        'std_cv': std_cv_score
                    }
        
        self.save_cv_results()
        return self.best_cv_model_info
    
    def train_final_model_on_all_training_data(self, X_train, y_train, model_info):
        print(f"\n{'='*80}")
        print(f"TRAINING FINAL MODEL ON ALL TRAINING DATA")
        print(f"Best model from CV: {model_info['model_name']}")
        print(f"CV Score: {model_info['mean_cv']:.1f}% ± {model_info['std_cv']:.1f}%")
        print(f"{'='*80}")
        
        model_name = model_info['model_name']
        model_config = model_info['model_config']
        
        X_train_final, X_val_final, y_train_final, y_val_final = train_test_split(
            X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
        )
        
        train_dataset = OptimizedDataset(X_train_final, y_train_final, 
                                       TRAINING_CONFIG['augmentation_strength'], AUGMENTATION_TARGET, True)
        val_dataset = OptimizedDataset(X_val_final, y_val_final, 'light', 200, False)
        
        batch_size = model_config['batch_size']
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                                num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, 
                              num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
        
        model = self.create_model(model_name, model_config)
        final_state, final_val_acc = self.train_model(model, model_name, train_loader, val_loader, 0)
        
        model.load_state_dict(final_state)
        
        print(f"\nFinal model training complete: {final_val_acc:.1f}%")
        
        return model
    
    def evaluate_on_held_out_test_set(self, model, X_test, y_test, model_name, model_config):
        print(f"\n{'='*80}")
        print(f"HELD-OUT TEST SET EVALUATION")
        print(f"{'='*80}")
        
        test_dataset = OptimizedDataset(X_test, y_test, 'light', 200, False)
        test_loader = DataLoader(test_dataset, batch_size=model_config['batch_size'], shuffle=False, 
                                num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
        
        model.eval()
        test_correct = 0
        test_total = 0
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = model(images)
                _, predicted = outputs.max(1)
                test_total += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        test_acc = 100. * test_correct / test_total
        
        print(f"\nTest Set Size: {test_total} images")
        print(f"Test Accuracy: {test_acc:.1f}%")
        
        save_path = os.path.join(self.save_dir, f"{model_name}_final_test{test_acc:.1f}.pth")
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_name': model_name,
            'cv_mean': self.best_cv_model_info['mean_cv'],
            'cv_std': self.best_cv_model_info['std_cv'],
            'test_accuracy': test_acc,
            'image_size': IMAGE_SIZE,
            'training_config': TRAINING_CONFIG
        }, save_path)
        
        print(f"\nFinal model saved: {save_path}")
        
        return test_acc
    
    def save_cv_results(self):
        results_path = os.path.join(self.save_dir, "cv_results.json")
        with open(results_path, 'w') as f:
            json.dump(self.cv_results, f, indent=2)
    
    def print_cv_summary(self):
        print(f"\n{'='*80}")
        print("CROSS-VALIDATION SUMMARY")
        print(f"{'='*80}")
        
        sorted_results = sorted(self.cv_results, key=lambda x: x['mean_cv'], reverse=True)
        
        for result in sorted_results[:10]:
            print(f"{result['model_name']:25} | CV: {result['mean_cv']:5.1f}% ± {result['std_cv']:4.1f}%")

def main():
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()
    
    print("ACADEMICALLY RIGOROUS JAWBONE MODEL EVALUATION")
    print("="*80)
    print("Methodology:")
    print("1. Split data: 80% training, 20% held-out test")
    print("2. Test set is NEVER used until final evaluation")
    print("3. Cross-validation on training data to select best architecture")
    print("4. Train final model on all training data")
    print("5. Evaluate once on held-out test set")
    print("="*80)
    
    start_time = time.time()
    
    images, ages = load_jawbone_data()
    
    unique_ages = sorted(list(set(ages)))
    label_mapping = {age: i for i, age in enumerate(unique_ages)}
    y_indices = np.array([label_mapping[age] for age in ages])
    
    print(f"\nACADEMIC TRAIN/TEST SPLIT")
    X_train, X_test, y_train, y_test = train_test_split(
        images, y_indices, test_size=0.2, random_state=42, stratify=y_indices
    )
    print(f"Training: {len(X_train)} images")
    print(f"Test (held-out): {len(X_test)} images")
    print("Test set will NOT be touched until final evaluation")
    
    trainer = AcademicModelTrainer(num_classes=len(unique_ages))
    
    best_model_info = trainer.run_cv_on_training_data(X_train, y_train, label_mapping)
    
    trainer.print_cv_summary()
    
    if best_model_info is None:
        print("\nNo models successfully completed CV")
        return
    
    final_model = trainer.train_final_model_on_all_training_data(X_train, y_train, best_model_info)
    
    test_accuracy = trainer.evaluate_on_held_out_test_set(
        final_model, X_test, y_test, 
        best_model_info['model_name'], 
        best_model_info['model_config']
    )
    
    elapsed = (time.time() - start_time) / 60
    
    print(f"\n{'='*80}")
    print("FINAL RESULTS")
    print(f"{'='*80}")
    print(f"Best Architecture: {best_model_info['model_name']}")
    print(f"Cross-Validation: {best_model_info['mean_cv']:.1f}% ± {best_model_info['std_cv']:.1f}%")
    print(f"Held-Out Test: {test_accuracy:.1f}%")
    print(f"Total Time: {elapsed:.1f} minutes")
    print(f"Results saved to: {trainer.save_dir}")

if __name__ == "__main__":
    main()

ERROR! Session/line number was not unique in database. History logging moved to new session 75
ACADEMICALLY RIGOROUS JAWBONE MODEL EVALUATION
Methodology:
1. Split data: 80% training, 20% held-out test
2. Test set is NEVER used until final evaluation
3. Cross-validation on training data to select best architecture
4. Train final model on all training data
5. Evaluate once on held-out test set
Total images: 243
Age distribution: {0.5: 39, 2.5: 33, 3.5: 29, 1.5: 62, 4.5: 20, 5.5: 60}

ACADEMIC TRAIN/TEST SPLIT
Training: 194 images
Test (held-out): 49 images
Test set will NOT be touched until final evaluation
Using device: cuda
GPU: NVIDIA GeForce RTX 5090
GPU Memory: 34.2 GB

CROSS-VALIDATION ON TRAINING DATA
5-fold CV across 33 architectures
Total CV experiments: 165

TESTING: EFFICIENTNET_B0

Fold 1/5
  Parameters: 4,777,200 trainable, 19,090 frozen
    Epoch 0: Train 73.4%, Val 76.8%
    Epoch 20: Train 100.0%, Val 77.8%
    Epoch 40: Train 100.0%, Val 78.3%
    Early stopping at epoc