## What changed?

In notebook `teeth - 3`, the basic prototype of the ensemble and method of building the ensemble were determined. However, ALL of the data were used during training, and this is not useful to the academic community since the model never has a reliably separate amount of test data. In this notebook, the data are properly split, loss curves are recorded, and all splits are kept as well.

In [1]:
# Check if CUDA

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("❌ CUDA not detected by PyTorch")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
GPU count: 1
GPU name: NVIDIA GeForce RTX 2060
GPU memory: 6.0 GB


In [None]:
# Academically Rigorous Multi-Architecture Ensemble Training Script

import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import timm
import numpy as np
import cv2
import random
import json
import os
import glob
import pickle
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

AVAILABLE_ARCHITECTURES = ['resnet18', 'resnet34', 'resnet50', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'mobilenetv3_large_100']
AUGMENTATION_TARGET = 1000
NUM_FOLDS = 5
IMAGE_SIZE = (224, 448)

plt.style.use('default')
sns.set_palette("husl")

try:
    from torch.cuda.amp import autocast, GradScaler
    MIXED_PRECISION_AVAILABLE = True
except ImportError:
    MIXED_PRECISION_AVAILABLE = False
    class autocast:
        def __enter__(self):
            return self
        def __exit__(self, *args):
            pass

def load_original_data():
    try:
        fpath = "G:\\Dropbox\\AI Projects\\buck\\tooth_analysis\\images\\*.png"
        
        image_paths = glob.glob(fpath)
        if not image_paths:
            raise FileNotFoundError(f"No images found at {fpath}")
        
        images = []
        ages = []
        
        for img_path in image_paths:
            try:
                img = cv2.imread(img_path)
                if img is None:
                    continue
                
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
                
                filename = os.path.basename(img_path)
                filename_no_ext = os.path.splitext(filename)[0]
                parts = filename_no_ext.split('_')
                
                if len(parts) < 3:
                    continue
                
                bbb_part = parts[1]
                
                if 'p' not in bbb_part:
                    continue
                
                value_str = bbb_part.replace('p', '.')
                try:
                    age_value = float(value_str)
                except ValueError:
                    continue
                
                images.append(img_resized)
                ages.append(age_value)
                
            except Exception as e:
                continue
        
        if not images:
            raise ValueError("No valid images loaded")
        
        ages_grouped = [5.5 if age >= 5.5 else age for age in ages]
        
        age_counts = Counter(ages_grouped)
        valid_ages = {age for age, count in age_counts.items() if count >= 3}
        
        filtered_images = []
        filtered_ages = []
        
        for img, age in zip(images, ages_grouped):
            if age in valid_ages:
                filtered_images.append(img)
                filtered_ages.append(age)
        
        return np.array(filtered_images), filtered_ages
        
    except Exception as e:
        print(f"Error loading data: {e}")
        raise

def enhanced_augment_image(image):
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    
    if random.random() < 0.7:
        angle = random.uniform(-15, 15)
        h, w = image.shape[:2]
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
        image = cv2.warpAffine(image, M, (w, h))
    
    if random.random() < 0.5:
        image = cv2.flip(image, 1)
    
    if random.random() < 0.8:
        alpha = random.uniform(0.7, 1.3)
        beta = random.randint(-25, 25)
        image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    
    if random.random() < 0.4:
        gamma = random.uniform(0.8, 1.2)
        inv_gamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
        image = cv2.LUT(image, table)
    
    if random.random() < 0.3:
        noise = np.random.normal(0, 7, image.shape).astype(np.int16)
        image_int16 = image.astype(np.int16)
        noisy_image = np.clip(image_int16 + noise, 0, 255)
        image = noisy_image.astype(np.uint8)
    
    return image

def create_augmented_data(X_train, y_train):
    class_counts = Counter(y_train)
    max_count = max(class_counts.values())
    target_count = max(AUGMENTATION_TARGET, max_count)
    
    X_aug = []
    y_aug = []
    
    for class_idx in range(len(set(y_train))):
        class_mask = y_train == class_idx
        class_images = X_train[class_mask]
        current_count = len(class_images)
        
        if current_count == 0:
            continue
        
        for _ in range(4):
            X_aug.extend(class_images)
            y_aug.extend([class_idx] * current_count)
        
        needed = target_count - (current_count * 4)
        for i in range(needed):
            orig_idx = random.randint(0, current_count - 1)
            aug_img = enhanced_augment_image(class_images[orig_idx].copy())
            X_aug.append(aug_img)
            y_aug.append(class_idx)
    
    return np.array(X_aug), np.array(y_aug)

class OptimizedDataset(Dataset):
    def __init__(self, X, y, test_time_aug=False):
        self.X = torch.FloatTensor(X if isinstance(X, np.ndarray) else np.array(X))
        self.y = torch.LongTensor(y if isinstance(y, np.ndarray) else np.array(y))
        self.test_time_aug = test_time_aug
        self.mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        self.std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx].clone()
        label = self.y[idx].clone()
        
        if image.max() > 1.0:
            image = image / 255.0
        
        if len(image.shape) == 3 and image.shape[-1] == 3:
            image = image.permute(2, 0, 1)
        
        if image.shape[-2:] != IMAGE_SIZE:
            image = F.interpolate(image.unsqueeze(0), size=IMAGE_SIZE, mode='bilinear', align_corners=False).squeeze(0)
        
        if self.test_time_aug and random.random() < 0.5:
            image = torch.flip(image, [2])
        
        image = (image - self.mean) / self.std
        return image, label

class MultiArchEnsembleTrainer:
    def __init__(self, num_classes, save_dir=None):
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        if save_dir is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.save_dir = f"academic_ensemble_{timestamp}"
        else:
            self.save_dir = save_dir
        
        os.makedirs(self.save_dir, exist_ok=True)
        print(f"Results will be saved to: {self.save_dir}")
        
        if torch.cuda.is_available():
            torch.backends.cudnn.benchmark = True
            if MIXED_PRECISION_AVAILABLE:
                self.scaler = GradScaler()
                self.use_amp = True
            else:
                self.use_amp = False
        else:
            self.use_amp = False
        
        print(f"Using device: {self.device}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name()}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    
    def create_model(self, architecture):
        model = timm.create_model(architecture, pretrained=True, num_classes=self.num_classes)
        
        if 'resnet' in architecture:
            frozen_layers = ['conv1', 'bn1', 'layer1', 'layer2', 'layer3']
            for name, param in model.named_parameters():
                for frozen_layer in frozen_layers:
                    if name.startswith(frozen_layer):
                        param.requires_grad = False
                        break
        elif 'efficientnet' in architecture:
            for name, param in model.named_parameters():
                if 'blocks.0' in name or 'blocks.1' in name or 'blocks.2' in name:
                    param.requires_grad = False
        elif 'mobilenet' in architecture:
            for name, param in model.named_parameters():
                if 'features.0' in name or 'features.1' in name or 'features.2' in name:
                    param.requires_grad = False
        
        if hasattr(model, 'fc'):
            in_features = model.fc.in_features
            model.fc = nn.Sequential(
                nn.Dropout(0.3),
                nn.Linear(in_features, self.num_classes)
            )
        elif hasattr(model, 'classifier'):
            if hasattr(model.classifier, 'in_features'):
                in_features = model.classifier.in_features
                model.classifier = nn.Sequential(
                    nn.Dropout(0.3),
                    nn.Linear(in_features, self.num_classes)
                )
            else:
                in_features = model.classifier[-1].in_features
                model.classifier[-1] = nn.Sequential(
                    nn.Dropout(0.3),
                    nn.Linear(in_features, self.num_classes)
                )
        
        return model.to(self.device)
    
    def train_single_model(self, train_loader, val_loader, architecture):
        model = self.create_model(architecture)
        
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        
        backbone_params = []
        classifier_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'fc' in name or 'classifier' in name:
                    classifier_params.append(param)
                else:
                    backbone_params.append(param)
        
        optimizer = optim.AdamW([
            {'params': backbone_params, 'lr': 0.0003},
            {'params': classifier_params, 'lr': 0.001}
        ], weight_decay=0.03)
        
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=80, eta_min=1e-6)
        
        max_epochs = 80
        patience = 20
        best_val_acc = 0.0
        patience_counter = 0
        best_state = None
        
        training_history = {
            'train_accs': [],
            'val_accs': [],
            'train_losses': [],
            'val_losses': []
        }
        
        for epoch in range(max_epochs):
            model.train()
            train_correct = 0
            train_total = 0
            train_loss_total = 0.0
            train_batches = 0
            
            for batch_idx, (images, labels) in enumerate(train_loader):
                images, labels = images.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                
                if self.use_amp:
                    with autocast():
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                    self.scaler.scale(loss).backward()
                    self.scaler.step(optimizer)
                    self.scaler.update()
                else:
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                
                _, predicted = torch.max(outputs, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                train_loss_total += loss.item()
                train_batches += 1
                
                if batch_idx % 10 == 0 and torch.cuda.is_available():
                    torch.cuda.empty_cache()
            
            train_acc = 100 * train_correct / train_total
            train_loss = train_loss_total / train_batches
            
            model.eval()
            val_correct = 0
            val_total = 0
            val_loss_total = 0.0
            val_batches = 0
            
            with torch.no_grad():
                for batch_idx, (images, labels) in enumerate(val_loader):
                    images, labels = images.to(self.device), labels.to(self.device)
                    
                    if self.use_amp:
                        with autocast():
                            outputs = model(images)
                            loss = criterion(outputs, labels)
                    else:
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                    
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
                    val_loss_total += loss.item()
                    val_batches += 1
                    
                    if batch_idx % 5 == 0 and torch.cuda.is_available():
                        torch.cuda.empty_cache()
            
            val_acc = 100 * val_correct / val_total
            val_loss = val_loss_total / val_batches
            scheduler.step()
            
            training_history['train_accs'].append(train_acc)
            training_history['val_accs'].append(val_acc)
            training_history['train_losses'].append(train_loss)
            training_history['val_losses'].append(val_loss)
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                best_state = model.state_dict().copy()
            else:
                patience_counter += 1
            
            if patience_counter >= patience:
                break
            
            if epoch % 10 == 0 and torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        if best_state is not None:
            model.load_state_dict(best_state)
        
        return model, best_val_acc, training_history, architecture
    
    def train_single_fold(self, train_loader, val_loader, fold_idx):
        best_model = None
        best_acc = 0.0
        best_history = None
        best_arch = None
        
        for arch in AVAILABLE_ARCHITECTURES:
            try:
                print(f"  Testing {arch}...")
                model, val_acc, history, architecture = self.train_single_model(train_loader, val_loader, arch)
                
                if val_acc > best_acc:
                    best_acc = val_acc
                    best_model = model
                    best_history = history
                    best_arch = architecture
                
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    
            except Exception as e:
                print(f"  Failed to train {arch}: {e}")
                continue
        
        return best_model, best_acc, best_history, best_arch
    
    def save_fold_immediately(self, model, fold_num, architecture, cv_score, label_mapping, history):
        model_path = os.path.join(self.save_dir, f"{architecture}_fold_{fold_num}_{cv_score:.1f}pct.pth")
        torch.save({
            'model_state_dict': model.state_dict(),
            'model_architecture': architecture,
            'fold': fold_num,
            'cv_score': cv_score,
            'num_classes': self.num_classes,
            'label_mapping': label_mapping,
            'input_size': IMAGE_SIZE,
            'training_history': history
        }, model_path)
        
        print(f"  Saved fold {fold_num} to: {model_path}")
        return model_path
    
    def train_ensemble_academic(self, X_train, y_train, label_mapping):
        skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
        fold_splits = list(skf.split(X_train, y_train))
        
        with open(os.path.join(self.save_dir, "label_mapping.json"), 'w') as f:
            json.dump(label_mapping, f, indent=2)
        
        with open(os.path.join(self.save_dir, "fold_splits.pkl"), 'wb') as f:
            pickle.dump(fold_splits, f)
        
        trained_models = []
        cv_scores = []
        training_histories = []
        architectures_used = []
        
        for fold_idx, (train_idx, val_idx) in enumerate(fold_splits):
            fold_num = fold_idx + 1
            print(f"Training Fold {fold_num}/{NUM_FOLDS}")
            
            X_train_fold = X_train[train_idx]
            y_train_fold = y_train[train_idx]
            X_val_fold = X_train[val_idx]
            y_val_fold = y_train[val_idx]
            
            X_train_aug, y_train_aug = create_augmented_data(X_train_fold, y_train_fold)
            
            train_dataset = OptimizedDataset(X_train_aug, y_train_aug)
            val_dataset = OptimizedDataset(X_val_fold, y_val_fold, test_time_aug=True)
            
            batch_size = 16 if torch.cuda.is_available() else 8
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
            
            model, val_acc, history, best_arch = self.train_single_fold(train_loader, val_loader, fold_num)
            
            if model is not None:
                self.save_fold_immediately(model, fold_num, best_arch, val_acc, label_mapping, history)
                
                trained_models.append(model)
                cv_scores.append(val_acc)
                training_histories.append(history)
                architectures_used.append(best_arch)
                print(f"Fold {fold_num}/{NUM_FOLDS} completed: {val_acc:.1f}% with {best_arch}")
            
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        
        return trained_models, cv_scores, training_histories, architectures_used
    
    def evaluate_ensemble(self, models, cv_scores, test_loader):
        scores_array = np.array(cv_scores)
        weights = np.exp(scores_array / 20)
        weights = weights / weights.sum()
        
        for model in models:
            model.eval()
        
        test_correct = 0
        test_total = 0
        all_ensemble_probs = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                
                ensemble_outputs = torch.zeros(images.size(0), self.num_classes).to(self.device)
                
                for model, weight in zip(models, weights):
                    outputs1 = model(images)
                    flipped = torch.flip(images, [3])
                    outputs2 = model(flipped)
                    avg_outputs = (outputs1 + outputs2) / 2
                    
                    ensemble_outputs += weight * F.softmax(avg_outputs, dim=1)
                
                _, predicted = torch.max(ensemble_outputs, 1)
                
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
                
                all_ensemble_probs.extend(ensemble_outputs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        ensemble_acc = 100 * test_correct / test_total
        
        return ensemble_acc, np.array(all_ensemble_probs), np.array(all_labels)

def save_final_ensemble(trainer, models, cv_scores, label_mapping, ensemble_acc, training_histories, architectures_used):
    ensemble_path = os.path.join(trainer.save_dir, "academic_ensemble.pth")
    torch.save({
        'model_state_dicts': [model.state_dict() for model in models],
        'architectures_used': architectures_used,
        'cv_scores': cv_scores,
        'ensemble_score': ensemble_acc,
        'num_classes': len(label_mapping),
        'label_mapping': label_mapping,
        'input_size': IMAGE_SIZE
    }, ensemble_path)
    
    with open(os.path.join(trainer.save_dir, "training_histories.pkl"), 'wb') as f:
        pickle.dump(training_histories, f)
    
    metadata = {
        'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S"),
        'architectures_used': architectures_used,
        'num_folds': len(models),
        'cv_scores': cv_scores,
        'cv_mean': float(np.mean(cv_scores)),
        'cv_std': float(np.std(cv_scores)),
        'ensemble_score': ensemble_acc,
        'label_mapping': label_mapping,
        'input_size': f'{IMAGE_SIZE[1]}x{IMAGE_SIZE[0]}',
        'augmentation_target': AUGMENTATION_TARGET,
        'completed': True,
        'academic_split': True
    }
    
    with open(os.path.join(trainer.save_dir, "metadata.json"), 'w') as f:
        json.dump(metadata, f, indent=2)
    
    print(f"Final ensemble saved to: {ensemble_path}")
    return trainer.save_dir

def plot_training_curves(training_histories, architectures_used, save_dir):
    Path(os.path.join(save_dir, "training_plots")).mkdir(exist_ok=True)
    
    fig, axes = plt.subplots(1, NUM_FOLDS, figsize=(4*NUM_FOLDS, 4))
    if NUM_FOLDS == 1:
        axes = [axes]
    
    for fold, (history, arch) in enumerate(zip(training_histories, architectures_used)):
        ax = axes[fold]
        epochs = range(1, len(history['train_accs']) + 1)
        
        ax.plot(epochs, history['train_accs'], 'b-', label='Training', linewidth=2, alpha=0.8)
        ax.plot(epochs, history['val_accs'], 'r-', label='Validation', linewidth=2, alpha=0.8)
        
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Accuracy (%)')
        ax.set_title(f'Fold {fold + 1} ({arch})')
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "training_plots", "training_curves.png"), dpi=300, bbox_inches='tight')
    plt.close()
    
    fig, axes = plt.subplots(1, NUM_FOLDS, figsize=(4*NUM_FOLDS, 4))
    if NUM_FOLDS == 1:
        axes = [axes]
    
    for fold, (history, arch) in enumerate(zip(training_histories, architectures_used)):
        ax = axes[fold]
        epochs = range(1, len(history['train_losses']) + 1)
        
        ax.plot(epochs, history['train_losses'], 'b-', label='Training Loss', linewidth=2, alpha=0.8)
        ax.plot(epochs, history['val_losses'], 'r-', label='Validation Loss', linewidth=2, alpha=0.8)
        
        ax.set_xlabel('Epoch')
        ax.set_ylabel('Loss')
        ax.set_title(f'Fold {fold + 1} Loss ({arch})')
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "training_plots", "loss_curves.png"), dpi=300, bbox_inches='tight')
    plt.close()

def main():
    print("Academically Rigorous Multi-Architecture Ensemble Training")
    print("=" * 80)
    print(f"Available architectures: {AVAILABLE_ARCHITECTURES}")
    print(f"Number of folds: {NUM_FOLDS}")
    print(f"Image size: {IMAGE_SIZE[1]}x{IMAGE_SIZE[0]}")
    print(f"Augmentation target: {AUGMENTATION_TARGET} samples per class")
    
    start_time = time.time()
    
    try:
        print("Loading data...")
        images, ages = load_original_data()
        
        unique_ages = sorted(list(set(ages)))
        label_mapping = {age: i for i, age in enumerate(unique_ages)}
        y_indices = np.array([label_mapping[age] for age in ages])
        
        print(f"Total images: {len(images)}")
        print(f"Age distribution: {dict(Counter(ages))}")
        
        print("\n--- ACADEMIC TRAIN/TEST SPLIT ---")
        X_train_all, X_test_final, y_train_all, y_test_final = train_test_split(
            images, y_indices, test_size=0.2, random_state=42, stratify=y_indices
        )
        
        print(f"Training data: {len(X_train_all)} images")
        print(f"Test data: {len(X_test_final)} images")
        print("Test set will NOT be used until final evaluation")
        
        trainer = MultiArchEnsembleTrainer(num_classes=len(unique_ages))
        
        print("\n--- CROSS-VALIDATION ON TRAINING DATA ONLY ---")
        models, cv_scores, training_histories, architectures_used = trainer.train_ensemble_academic(
            X_train_all, y_train_all, label_mapping
        )
        
        print("\n--- FINAL EVALUATION ON HELD-OUT TEST SET ---")
        test_dataset = OptimizedDataset(X_test_final, y_test_final, test_time_aug=True)
        batch_size = 32 if torch.cuda.is_available() else 8
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
        
        ensemble_acc, _, _ = trainer.evaluate_ensemble(models, cv_scores, test_loader)
        
        print("Saving final ensemble...")
        save_dir = save_final_ensemble(trainer, models, cv_scores, label_mapping, ensemble_acc, training_histories, architectures_used)
        
        print("Generating training curves...")
        plot_training_curves(training_histories, architectures_used, save_dir)
        
        elapsed = (time.time() - start_time) / 60
        
        print("\n=== ACADEMIC RESULTS ===")
        print("Cross-Validation Results (Training Data Only):")
        for i, (score, arch) in enumerate(zip(cv_scores, architectures_used)):
            print(f"Fold {i+1}: {score:.1f}% ({arch})")
        print(f"CV Mean: {np.mean(cv_scores):.1f}% ± {np.std(cv_scores):.1f}%")
        print(f"\nFinal Test Accuracy (Held-Out Data): {ensemble_acc:.1f}%")
        print(f"Training Time: {elapsed:.1f} minutes")
        print(f"All results saved to: {save_dir}")
        
        print("\n=== OVERFITTING ANALYSIS ===")
        print("Check training_plots/ for loss curves to verify no overfitting")
        
        return {
            'models': models,
            'cv_scores': cv_scores,
            'test_accuracy': ensemble_acc,
            'architectures_used': architectures_used,
            'save_directory': save_dir
        }
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Academically Rigorous Multi-Architecture Ensemble Training
Available architectures: ['resnet18', 'resnet34', 'resnet50', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'mobilenetv3_large_100']
Number of folds: 5
Image size: 448x224
Augmentation target: 1000 samples per class
Loading data...
Total images: 243
Age distribution: {0.5: 39, 2.5: 33, 3.5: 29, 1.5: 62, 4.5: 20, 5.5: 60}

--- ACADEMIC TRAIN/TEST SPLIT ---
Training data: 194 images
Test data: 49 images
Test set will NOT be used until final evaluation
Results will be saved to: academic_ensemble_20250721_075823
Using device: cuda
GPU: NVIDIA GeForce RTX 2060
GPU Memory: 6.0 GB

--- CROSS-VALIDATION ON TRAINING DATA ONLY ---
Training Fold 1/5
  Testing resnet18...
