In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import timm
import numpy as np
import cv2
import random
import time
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

def load_original_data():
    """Load the original 357 images"""
    print("📂 LOADING ORIGINAL DATA")
    print("="*50)
    
    # Load using the user's method
    from buck.analysis.basics import ingest_images
    
    fpath = "C:\\Users\\aaron\\Dropbox\\AI Projects\\buck\\images\\squared\\color\\*.png"
    images, ages = ingest_images(fpath)
    print(f"   ✅ Loaded {len(images)} original images")
    
    # Group ages: 5.5+ all become 5.5 (creating exactly 5 classes)
    print("   🔄 Grouping ages: 5.5+ → 5.5")
    ages_grouped = []
    for age in ages:
        if age >= 5.5:
            ages_grouped.append(5.5)
        else:
            ages_grouped.append(age)
    
    # Print age distribution before and after grouping
    print(f"   📊 Original age distribution: {dict(Counter(ages))}")
    print(f"   📊 Grouped age distribution: {dict(Counter(ages_grouped))}")
    
    return images, ages_grouped

def create_train_val_test_split(images, ages, test_size=0.2, val_size=0.15, random_state=42):
    """Create train/validation/test split"""
    print("\n🔀 CREATING TRAIN/VAL/TEST SPLIT")
    print("="*50)
    
    # Convert to numpy arrays if needed
    if not isinstance(images, np.ndarray):
        images = np.array(images)
    if not isinstance(ages, np.ndarray):
        ages = np.array(ages)
    
    # Check if stratified split is possible
    age_counts = Counter(ages)
    min_count = min(age_counts.values())
    can_stratify = min_count >= 2
    
    print(f"   📊 Age distribution: {dict(age_counts)}")
    print(f"   📊 Minimum class size: {min_count}")
    print(f"   🎯 Can use stratified split: {can_stratify}")
    
    if can_stratify:
        # First split: separate test set (stratified)
        X_temp, X_test, y_temp, y_test = train_test_split(
            images, ages, test_size=test_size, random_state=random_state, stratify=ages
        )
        
        # Second split: separate train and validation from remaining data
        val_size_adjusted = val_size / (1 - test_size)  # Adjust for remaining data
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=val_size_adjusted, random_state=random_state, stratify=y_temp
        )
    else:
        print("   ⚠️ Using random split (some classes too small for stratification)")
        # First split: separate test set (random)
        X_temp, X_test, y_temp, y_test = train_test_split(
            images, ages, test_size=test_size, random_state=random_state, shuffle=True
        )
        
        # Second split: separate train and validation from remaining data
        val_size_adjusted = val_size / (1 - test_size)  # Adjust for remaining data
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=val_size_adjusted, random_state=random_state, shuffle=True
        )
    
    # Create label mapping
    unique_ages = sorted(list(set(ages)))
    label_mapping = {age: i for i, age in enumerate(unique_ages)}
    reverse_mapping = {i: age for age, i in label_mapping.items()}
    
    print(f"   📊 Train: {len(X_train)} samples")
    print(f"   📊 Val: {len(X_val)} samples") 
    print(f"   📊 Test: {len(X_test)} samples")
    print(f"   🏷️ Label mapping: {label_mapping}")
    print(f"   🎯 Number of classes: {len(unique_ages)}")
    
    # Convert ages to class indices
    y_train_indices = np.array([label_mapping[age] for age in y_train])
    y_val_indices = np.array([label_mapping[age] for age in y_val])
    y_test_indices = np.array([label_mapping[age] for age in y_test])
    
    print(f"   📈 Train distribution: {Counter(y_train_indices)}")
    print(f"   📈 Val distribution: {Counter(y_val_indices)}")
    print(f"   📈 Test distribution: {Counter(y_test_indices)}")
    
    return (X_train, y_train_indices, X_val, y_val_indices, X_test, y_test_indices, 
            label_mapping, reverse_mapping)

def augment_image(image):
    """Apply random augmentation to an image"""
    # Ensure image is uint8
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    
    # Random rotation
    if random.random() < 0.5:
        angle = random.uniform(-15, 15)
        h, w = image.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        image = cv2.warpAffine(image, M, (w, h))
    
    # Random horizontal flip
    if random.random() < 0.5:
        image = cv2.flip(image, 1)
    
    # Random brightness/contrast
    if random.random() < 0.5:
        alpha = random.uniform(0.8, 1.2)  # Contrast
        beta = random.randint(-20, 20)    # Brightness
        image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    
    # Random noise (fixed data type issue)
    if random.random() < 0.3:
        # Create noise with same dtype as image
        noise = np.random.normal(0, 5, image.shape).astype(np.int16)  # Use int16 to handle negative values
        # Convert image to int16 for safe addition
        image_int16 = image.astype(np.int16)
        # Add noise and clip to valid range
        noisy_image = np.clip(image_int16 + noise, 0, 255)
        # Convert back to uint8
        image = noisy_image.astype(np.uint8)
    
    return image

def balance_and_augment_data(X_train, y_train, augment_multiplier=30, num_classes=5):
    """Balance classes and augment training data"""
    print(f"\n🔄 BALANCING AND AUGMENTING DATA")
    print("="*50)
    print(f"   🎯 Target: {augment_multiplier}x augmentation per class")
    
    # Count samples per class
    class_counts = Counter(y_train)
    print(f"   📊 Original distribution: {dict(class_counts)}")
    
    # Find target count (based on largest class * multiplier)
    max_count = max(class_counts.values())
    target_count = max_count * augment_multiplier
    print(f"   🎯 Target samples per class: {target_count}")
    
    X_augmented = []
    y_augmented = []
    
    for class_idx in range(num_classes):
        # Get samples for this class
        class_mask = y_train == class_idx
        class_images = X_train[class_mask]
        class_labels = y_train[class_mask]
        
        current_count = len(class_images)
        needed_count = target_count
        
        print(f"   📈 Class {class_idx}: {current_count} → {needed_count} samples")
        
        # Add original samples
        X_augmented.extend(class_images)
        y_augmented.extend(class_labels)
        
        # Generate augmented samples
        augmented_needed = needed_count - current_count
        
        for i in range(augmented_needed):
            # Pick random original image from this class
            original_idx = random.randint(0, current_count - 1)
            original_image = class_images[original_idx].copy()
            
            # Augment it
            augmented_image = augment_image(original_image)
            
            X_augmented.append(augmented_image)
            y_augmented.append(class_idx)
    
    # Convert to arrays
    X_augmented = np.array(X_augmented)
    y_augmented = np.array(y_augmented)
    
    print(f"   ✅ Augmentation complete: {len(X_augmented)} total samples")
    print(f"   📊 Final distribution: {Counter(y_augmented)}")
    
    return X_augmented, y_augmented

class DeerDataset(Dataset):
    """Dataset for deer aging with preprocessing"""
    
    def __init__(self, X, y, transform=True):
        if isinstance(X, np.ndarray):
            self.X = torch.FloatTensor(X)
        else:
            self.X = torch.FloatTensor(np.array(X))
            
        if isinstance(y, np.ndarray):
            self.y = torch.LongTensor(y)
        else:
            self.y = torch.LongTensor(np.array(y))
        
        self.transform = transform
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx].clone()
        label = self.y[idx].clone()
        
        # Normalize to [0,1]
        if image.max() > 1.0:
            image = image / 255.0
        
        # Ensure CHW format (channels first)
        if len(image.shape) == 3 and image.shape[-1] == 3:
            image = image.permute(2, 0, 1)
        
        # Resize to 224x224
        if image.shape[-2:] != (224, 224):
            image = image.unsqueeze(0)
            image = F.interpolate(image, size=(224, 224), mode='bilinear', align_corners=False)
            image = image.squeeze(0)
        
        # ImageNet normalization
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        image = (image - mean) / std
        
        return image, label

class UltraAggressiveTrainer:
    """Ultra aggressive training with maximum epochs and minimal early stopping"""
    
    def __init__(self, num_classes=5):
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"🔥 ULTRA AGGRESSIVE TRAINER")
        print(f"   Device: {self.device}")
        print(f"   Classes: {num_classes}")
    
    def get_all_architectures(self):
        """Get comprehensive list of ALL architectures"""
        
        architectures = {
            # EfficientNet B0-B7 (COMPLETE SERIES)
            'EfficientNet-B0': {'model_name': 'efficientnet_b0', 'family': 'EfficientNet'},
            'EfficientNet-B1': {'model_name': 'efficientnet_b1', 'family': 'EfficientNet'},
            'EfficientNet-B2': {'model_name': 'efficientnet_b2', 'family': 'EfficientNet'},
            'EfficientNet-B3': {'model_name': 'efficientnet_b3', 'family': 'EfficientNet'},
            'EfficientNet-B4': {'model_name': 'efficientnet_b4', 'family': 'EfficientNet'},
            'EfficientNet-B5': {'model_name': 'efficientnet_b5', 'family': 'EfficientNet'},
            'EfficientNet-B6': {'model_name': 'efficientnet_b6', 'family': 'EfficientNet'},
            'EfficientNet-B7': {'model_name': 'efficientnet_b7', 'family': 'EfficientNet'},
            
            # EfficientNetV2 
            'EfficientNetV2-S': {'model_name': 'efficientnetv2_s', 'family': 'EfficientNetV2'},
            'EfficientNetV2-M': {'model_name': 'efficientnetv2_m', 'family': 'EfficientNetV2'},
            'EfficientNetV2-L': {'model_name': 'efficientnetv2_l', 'family': 'EfficientNetV2'},
            
            # DenseNet Family
            'DenseNet-121': {'model_name': 'densenet121', 'family': 'DenseNet'},
            'DenseNet-161': {'model_name': 'densenet161', 'family': 'DenseNet'},
            'DenseNet-169': {'model_name': 'densenet169', 'family': 'DenseNet'},
            'DenseNet-201': {'model_name': 'densenet201', 'family': 'DenseNet'},
            
            # ResNet Family
            'ResNet-34': {'model_name': 'resnet34', 'family': 'ResNet'},
            'ResNet-50': {'model_name': 'resnet50', 'family': 'ResNet'},
            'ResNet-101': {'model_name': 'resnet101', 'family': 'ResNet'},
            'ResNet-152': {'model_name': 'resnet152', 'family': 'ResNet'},
            
            # ResNeXt
            'ResNeXt-50': {'model_name': 'resnext50_32x4d', 'family': 'ResNeXt'},
            'ResNeXt-101': {'model_name': 'resnext101_32x8d', 'family': 'ResNeXt'},
            
            # Wide ResNet
            'Wide-ResNet-50': {'model_name': 'wide_resnet50_2', 'family': 'Wide-ResNet'},
            'Wide-ResNet-101': {'model_name': 'wide_resnet101_2', 'family': 'Wide-ResNet'},
            
            # MobileNet Family
            'MobileNetV2': {'model_name': 'mobilenetv2_100', 'family': 'MobileNet'},
            'MobileNetV3-Small': {'model_name': 'mobilenetv3_small_100', 'family': 'MobileNet'},
            'MobileNetV3-Large': {'model_name': 'mobilenetv3_large_100', 'family': 'MobileNet'},
            
            # RegNet Family
            'RegNetX-400MF': {'model_name': 'regnetx_400mf', 'family': 'RegNet'},
            'RegNetX-800MF': {'model_name': 'regnetx_800mf', 'family': 'RegNet'},
            'RegNetY-400MF': {'model_name': 'regnety_400mf', 'family': 'RegNet'},
            'RegNetY-800MF': {'model_name': 'regnety_800mf', 'family': 'RegNet'},
            
            # ConvNeXt
            'ConvNeXt-Tiny': {'model_name': 'convnext_tiny', 'family': 'ConvNeXt'},
            'ConvNeXt-Small': {'model_name': 'convnext_small', 'family': 'ConvNeXt'},
            'ConvNeXt-Base': {'model_name': 'convnext_base', 'family': 'ConvNeXt'},
            
            # Vision Transformer variants
            'Swin-Tiny': {'model_name': 'swin_tiny_patch4_window7_224', 'family': 'Swin'},
            'Swin-Small': {'model_name': 'swin_small_patch4_window7_224', 'family': 'Swin'},
            
            # VGG (classic)
            'VGG-16': {'model_name': 'vgg16', 'family': 'VGG'},
            'VGG-19': {'model_name': 'vgg19', 'family': 'VGG'},
        }
        
        print(f"\n🏗️ ULTRA AGGRESSIVE ARSENAL ({len(architectures)} models)")
        print("="*80)
        
        # Group by family and show counts
        families = {}
        for arch_name, arch_info in architectures.items():
            family = arch_info['family']
            if family not in families:
                families[family] = []
            families[family].append(arch_name)
        
        for family, models in families.items():
            print(f"📁 {family} ({len(models)} models): {', '.join(models)}")
        
        return architectures
    
    def create_model(self, arch_name, model_name, freeze_strategy='none'):
        """Create model with different freezing strategies"""
        try:
            print(f"      🔧 Creating {arch_name}...")
            model = timm.create_model(model_name, pretrained=True, num_classes=self.num_classes)
            
            if freeze_strategy == 'backbone':
                print(f"         🧊 Freezing backbone layers...")
                for name, param in model.named_parameters():
                    if 'classifier' not in name and 'head' not in name and 'fc' not in name:
                        param.requires_grad = False
                
                trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
                total_params = sum(p.numel() for p in model.parameters())
                print(f"         ✅ Loaded: {total_params:,} total, {trainable_params:,} trainable")
            
            elif freeze_strategy == 'partial':
                print(f"         ❄️ Partial freeze (last 2 blocks unfrozen)...")
                # More sophisticated partial freezing
                all_params = list(model.named_parameters())
                total_layers = len(all_params)
                freeze_until = int(total_layers * 0.7)  # Freeze first 70% of layers
                
                for i, (name, param) in enumerate(all_params):
                    if i < freeze_until and 'classifier' not in name and 'head' not in name and 'fc' not in name:
                        param.requires_grad = False
                
                trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
                total_params = sum(p.numel() for p in model.parameters())
                print(f"         ✅ Loaded: {total_params:,} total, {trainable_params:,} trainable")
            
            else:  # no freezing
                total_params = sum(p.numel() for p in model.parameters())
                print(f"         ✅ Loaded: {total_params:,} parameters (all trainable)")
            
            model = model.to(self.device)
            return model
            
        except Exception as e:
            print(f"         ❌ Failed: {str(e)[:60]}...")
            return None
    
    def ultra_aggressive_training(self, model, arch_name, train_loader, val_loader, test_loader, strategy='unfrozen'):
        """Ultra aggressive training with minimal early stopping"""
        print(f"      🔥 ULTRA AGGRESSIVE TRAINING: {arch_name} ({strategy})...")
        
        # More aggressive setup
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        
        # Strategy-specific hyperparameters
        if strategy == 'frozen':
            lr = 0.01
            max_epochs = 250  # More epochs for frozen
            patience = 100    # Much higher patience
        elif strategy == 'partial':
            lr = 0.005
            max_epochs = 250  # Even more epochs for partial
            patience = 100
        else:  # unfrozen
            lr = 0.001
            max_epochs = 250  # Maximum epochs for full training
            patience = 100     # Very high patience
        
        optimizer = optim.AdamW(
            model.parameters(),
            lr=lr,
            weight_decay=0.01,
            betas=(0.9, 0.999)
        )
        
        # Simple step scheduler (more stable than cosine annealing)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
        
        best_val_acc = 0.0
        patience_counter = 0
        
        print(f"         📊 ULTRA SETUP: {max_epochs} epochs, LR={lr}, patience={patience}")
        
        for epoch in range(max_epochs):
            # Training phase
            model.train()
            train_correct = 0
            train_total = 0
            train_loss = 0.0
            
            for batch_idx, (images, labels) in enumerate(train_loader):
                images, labels = images.to(self.device), labels.to(self.device)
                
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                
                # Gradient clipping
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                
                optimizer.step()
                
                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
            
            train_acc = 100 * train_correct / train_total
            
            # Validation phase
            model.eval()
            val_correct = 0
            val_total = 0
            val_loss = 0.0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
                    
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
            
            val_acc = 100 * val_correct / val_total
            scheduler.step()
            current_lr = scheduler.get_last_lr()[0]
            
            # Very lenient early stopping
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                best_model_state = model.state_dict().copy()
                improvement = "🔥"
            else:
                patience_counter += 1
                improvement = ""
            
            # More frequent progress updates
            if epoch % 5 == 0 or epoch < 10 or improvement or epoch > max_epochs - 10:
                gap = train_acc - val_acc
                print(f"         Epoch {epoch:3d}: Train {train_acc:.1f}%, Val {val_acc:.1f}% (gap: {gap:+.1f}%), LR: {current_lr:.2e} {improvement}")
            
            ## Much more lenient early stopping
            #if patience_counter >= patience:
            #    print(f"         Early stopping at epoch {epoch} (patience={patience})")
            #    break
        
        # Restore best model
        model.load_state_dict(best_model_state)
        
        # Test evaluation
        model.eval()
        test_correct = 0
        test_total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
        
        test_acc = 100 * test_correct / test_total
        
        print(f"         🎯 {arch_name} ({strategy}) FINAL: Val {best_val_acc:.1f}%, Test {test_acc:.1f}%")
        
        return best_val_acc, test_acc
    
    def test_architecture_with_multiple_strategies(self, arch_name, model_name, train_loader, val_loader, test_loader):
        """Test architecture with multiple training strategies"""
        results = []
        
        # Strategy 1: Frozen backbone (fast warmup)
        print(f"      🧊 FROZEN BACKBONE STRATEGY:")
        model_frozen = self.create_model(arch_name, model_name, freeze_strategy='backbone')
        if model_frozen is not None:
            try:
                val_acc_frozen, test_acc_frozen = self.ultra_aggressive_training(
                    model_frozen, arch_name, train_loader, val_loader, test_loader, strategy='frozen'
                )
                results.append({
                    'name': f"{arch_name}-Frozen",
                    'strategy': 'frozen',
                    'val_accuracy': val_acc_frozen,
                    'test_accuracy': test_acc_frozen
                })
            except Exception as e:
                print(f"         ❌ Frozen strategy failed: {str(e)[:50]}...")
        
        # Strategy 2: Partial freeze (if frozen worked reasonably)
        if results and results[-1]['val_accuracy'] > 35:
            print(f"      ❄️ PARTIAL FREEZE STRATEGY:")
            model_partial = self.create_model(arch_name, model_name, freeze_strategy='partial')
            if model_partial is not None:
                try:
                    val_acc_partial, test_acc_partial = self.ultra_aggressive_training(
                        model_partial, arch_name, train_loader, val_loader, test_loader, strategy='partial'
                    )
                    results.append({
                        'name': f"{arch_name}-Partial",
                        'strategy': 'partial',
                        'val_accuracy': val_acc_partial,
                        'test_accuracy': test_acc_partial
                    })
                except Exception as e:
                    print(f"         ❌ Partial strategy failed: {str(e)[:50]}...")
        
        # Strategy 3: Full unfrozen (if partial worked well)
        if results and max(r['val_accuracy'] for r in results) > 45:
            print(f"      🔥 FULL UNFROZEN STRATEGY:")
            model_unfrozen = self.create_model(arch_name, model_name, freeze_strategy='none')
            if model_unfrozen is not None:
                try:
                    val_acc_unfrozen, test_acc_unfrozen = self.ultra_aggressive_training(
                        model_unfrozen, arch_name, train_loader, val_loader, test_loader, strategy='unfrozen'
                    )
                    results.append({
                        'name': f"{arch_name}-Unfrozen",
                        'strategy': 'unfrozen',
                        'val_accuracy': val_acc_unfrozen,
                        'test_accuracy': test_acc_unfrozen
                    })
                except Exception as e:
                    print(f"         ❌ Unfrozen strategy failed: {str(e)[:50]}...")
        
        return results
    
    def run_ultra_aggressive_test(self, X_train, y_train, X_val, y_val, X_test, y_test):
        """Run ultra aggressive test on ALL architectures"""
        print("🔥 ULTRA AGGRESSIVE ARCHITECTURE TEST")
        print("="*80)
        print("🎯 ULTRA AGGRESSIVE OPTIMIZATIONS:")
        print("   • 60-100 epochs per strategy (vs 20)")
        print("   • 25-40 patience (vs 10)")
        print("   • Multiple training strategies per architecture")
        print("   • Step LR scheduler (more stable)")
        print("   • Strategy progression (frozen → partial → unfrozen)")
        print("   • NO QUITTING until models reach potential!")
        print("="*80)
        
        # Create datasets
        train_dataset = DeerDataset(X_train, y_train)
        val_dataset = DeerDataset(X_val, y_val)
        test_dataset = DeerDataset(X_test, y_test)
        
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
        val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=0)
        test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0)
        
        print(f"📊 Data ready: {len(train_dataset)} train, {len(val_dataset)} val, {len(test_dataset)} test")
        
        # Get all architectures
        architectures = self.get_all_architectures()
        
        all_results = []
        total_start_time = time.time()
        
        print(f"\n🔥 ULTRA AGGRESSIVE TESTING: {len(architectures)} ARCHITECTURES")
        print("="*80)
        print("⏰ WARNING: This will take many hours but WILL find the best performance!")
        print("="*80)
        
        for i, (arch_name, arch_info) in enumerate(architectures.items(), 1):
            print(f"\n[{i}/{len(architectures)}] 🔥 ULTRA AGGRESSIVE {arch_name}")
            print("-" * 70)
            
            start_time = time.time()
            
            # Test with multiple strategies
            arch_results = self.test_architecture_with_multiple_strategies(
                arch_name, arch_info['model_name'], train_loader, val_loader, test_loader
            )
            
            # Add metadata and timing
            for result in arch_results:
                result['family'] = arch_info['family']
                result['training_time'] = time.time() - start_time
                all_results.append(result)
            
            if arch_results:
                best_arch_result = max(arch_results, key=lambda x: x['test_accuracy'])
                print(f"      🏆 Best {arch_name}: {best_arch_result['name']} ({best_arch_result['test_accuracy']:.1f}%)")
            
            print(f"      ⏱️ Total time for {arch_name}: {time.time() - start_time:.1f}s")
            
            # Intermediate leaderboard every 3 architectures
            if i % 3 == 0:
                current_best = sorted(all_results, key=lambda x: x['test_accuracy'], reverse=True)[:3]
                print(f"\n📊 CURRENT LEADERBOARD (after {i} architectures):")
                for j, result in enumerate(current_best, 1):
                    print(f"   {j}. {result['name']}: {result['test_accuracy']:.1f}%")
                print()
        
        total_time = time.time() - total_start_time
        
        # Sort all results
        all_results.sort(key=lambda x: x['test_accuracy'], reverse=True)
        
        # Display ultra comprehensive results
        print(f"\n🏆 ULTRA AGGRESSIVE FINAL RESULTS")
        print("="*80)
        print(f"⏰ Total testing time: {total_time/3600:.1f} hours")
        print(f"🎯 Models tested: {len(all_results)}")
        print("="*80)
        print(f"{'Rank':<4} {'Model':<30} {'Strategy':<10} {'Val%':<8} {'Test%':<8} {'vs 54.2%'}")
        print("-" * 80)
        
        for i, result in enumerate(all_results, 1):
            val_acc = result['val_accuracy']
            test_acc = result['test_accuracy']
            strategy = result['strategy']
            
            if test_acc >= 75.0:
                status = "🎉 BREAKTHROUGH!"
            elif test_acc >= 65.0:
                status = "🔥 EXCELLENT!"
            elif test_acc > 54.2:
                status = "🚀 NEW BEST!"
            elif test_acc > 45.0:
                status = "📈 Good"
            else:
                status = "📉 Weak"
            
            print(f"{i:<4} {result['name']:<30} {strategy:<10} {val_acc:<7.1f} {test_acc:<7.1f} {status}")
        
        # Analysis
        if all_results:
            best = all_results[0]
            breakthrough_count = sum(1 for r in all_results if r['test_accuracy'] > 54.2)
            excellent_count = sum(1 for r in all_results if r['test_accuracy'] >= 65.0)
            
            print(f"\n🎊 ULTRA AGGRESSIVE SUMMARY:")
            print(f"   🏆 ULTIMATE CHAMPION: {best['name']} ({best['test_accuracy']:.1f}%)")
            print(f"   🚀 Beat 54.2% baseline: {breakthrough_count}/{len(all_results)} models")
            print(f"   🎉 Achieved 65%+: {excellent_count} models")
            
            if best['test_accuracy'] >= 75.0:
                print(f"   🎉 MISSION ACCOMPLISHED! Achieved 75%+ accuracy!")
            elif best['test_accuracy'] >= 65.0:
                print(f"   🎊 EXCELLENT! Found 65%+ architecture!")
            elif best['test_accuracy'] > 54.2:
                improvement = best['test_accuracy'] - 54.2
                print(f"   🚀 SUCCESS! Improved by +{improvement:.1f}% over baseline!")
        
        print("="*80)
        return all_results

def run_ultra_aggressive_pipeline():
    """Run the ultra aggressive pipeline"""
    print("🔥 ULTRA AGGRESSIVE DEER AGING PIPELINE")
    print("="*80)
    print("🎯 GOAL: Train each architecture to its MAXIMUM potential")
    print("🎯 METHOD: 60-100 epochs, multiple strategies, high patience")
    print("="*80)
    
    # Load and prepare data
    images, ages = load_original_data()
    X_train, y_train, X_val, y_val, X_test, y_test, label_mapping, reverse_mapping = create_train_val_test_split(images, ages)
    X_train_aug, y_train_aug = balance_and_augment_data(X_train, y_train, augment_multiplier=30, num_classes=len(label_mapping))
    
    # Ultra aggressive testing
    trainer = UltraAggressiveTrainer(num_classes=len(label_mapping))
    results = trainer.run_ultra_aggressive_test(X_train_aug, y_train_aug, X_val, y_val, X_test, y_test)
    
    return results, label_mapping

# 🔥 RUN ULTRA AGGRESSIVE PIPELINE
print("🔥 LAUNCHING ULTRA AGGRESSIVE PIPELINE...")
print("⚠️  This will train each architecture to its MAXIMUM potential - expect 8-12 hours!")
print("💪 But you wanted proper training time - here it is!")
ultra_results, final_label_mapping = run_ultra_aggressive_pipeline()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "C:\Users\aaron\AppData\Local\Programs\Python\Python311\Lib\runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\aaron\AppData\Local\Programs\Python\Python311\Lib\runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "G:\Dropbox\AI Projects\buck\buck-env\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "G:\Dropbox\AI Projects\buck\buck-env\Lib\site-packages\traitlets\config\application.py", line 10