## What changed?

This notebook takes the output result of `250813_nda_all` and attempts to optimize a single model instead of an ensemble.

In [1]:
# Check if CUDA

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("❌ CUDA not detected by PyTorch")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
GPU count: 1
GPU name: NVIDIA GeForce RTX 2060
GPU memory: 6.0 GB


In [2]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import timm
import numpy as np
import cv2
import random
import json
import os
import glob
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

OPTIMIZATION_CONFIGS = [
    {
        'name': 'baseline_deep',
        'lr_backbone': 0.0003,
        'lr_head': 0.001,
        'weight_decay': 0.03,
        'dropout': 0.3,
        'label_smoothing': 0.1,
        'augmentation_strength': 'medium',
        'scheduler': 'cosine',
        'warmup_epochs': 8,
        'freeze_layers': 3,
        'head_architecture': 'simple',
        'batch_size': 16
    },
    {
        'name': 'aggressive_lr',
        'lr_backbone': 0.0008,
        'lr_head': 0.003,
        'weight_decay': 0.05,
        'dropout': 0.4,
        'label_smoothing': 0.15,
        'augmentation_strength': 'strong',
        'scheduler': 'cosine',
        'warmup_epochs': 12,
        'freeze_layers': 5,
        'head_architecture': 'complex',
        'batch_size': 12
    },
    {
        'name': 'conservative_deep',
        'lr_backbone': 0.0001,
        'lr_head': 0.0005,
        'weight_decay': 0.01,
        'dropout': 0.2,
        'label_smoothing': 0.05,
        'augmentation_strength': 'light',
        'scheduler': 'cosine',
        'warmup_epochs': 5,
        'freeze_layers': 1,
        'head_architecture': 'simple',
        'batch_size': 20
    },
    {
        'name': 'step_balanced',
        'lr_backbone': 0.0004,
        'lr_head': 0.0012,
        'weight_decay': 0.04,
        'dropout': 0.35,
        'label_smoothing': 0.12,
        'augmentation_strength': 'medium',
        'scheduler': 'step',
        'warmup_epochs': 6,
        'freeze_layers': 3,
        'head_architecture': 'adaptive',
        'batch_size': 16
    },
    {
        'name': 'heavy_reg',
        'lr_backbone': 0.0002,
        'lr_head': 0.0008,
        'weight_decay': 0.08,
        'dropout': 0.5,
        'label_smoothing': 0.2,
        'augmentation_strength': 'strong',
        'scheduler': 'cosine',
        'warmup_epochs': 10,
        'freeze_layers': 0,
        'head_architecture': 'complex',
        'batch_size': 14
    },
    {
        'name': 'minimal_freeze_high_lr',
        'lr_backbone': 0.0006,
        'lr_head': 0.002,
        'weight_decay': 0.02,
        'dropout': 0.25,
        'label_smoothing': 0.08,
        'augmentation_strength': 'medium',
        'scheduler': 'cosine',
        'warmup_epochs': 8,
        'freeze_layers': 1,
        'head_architecture': 'adaptive',
        'batch_size': 18
    },
    {
        'name': 'cyclical_advanced',
        'lr_backbone': 0.0005,
        'lr_head': 0.0015,
        'weight_decay': 0.06,
        'dropout': 0.45,
        'label_smoothing': 0.18,
        'augmentation_strength': 'strong',
        'scheduler': 'cyclical',
        'warmup_epochs': 7,
        'freeze_layers': 4,
        'head_architecture': 'complex',
        'batch_size': 14
    },
    {
        'name': 'extreme_reg',
        'lr_backbone': 0.00015,
        'lr_head': 0.0006,
        'weight_decay': 0.12,
        'dropout': 0.6,
        'label_smoothing': 0.25,
        'augmentation_strength': 'strong',
        'scheduler': 'cosine',
        'warmup_epochs': 15,
        'freeze_layers': 0,
        'head_architecture': 'complex',
        'batch_size': 12
    }
]

AUGMENTATION_TARGET = 400
NUM_FOLDS = 5
IMAGE_SIZE = (224, 224)
MAX_EPOCHS = 60
PATIENCE = 12

class AdvancedAugmentationDataset(Dataset):
    def __init__(self, X, y, augmentation_strength='medium', test_time_aug=False):
        self.X = torch.FloatTensor(X if isinstance(X, np.ndarray) else np.array(X))
        self.y = torch.LongTensor(y if isinstance(y, np.ndarray) else np.array(y))
        self.test_time_aug = test_time_aug
        self.augmentation_strength = augmentation_strength
        self.mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        self.std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    def advanced_augment(self, image):
        is_grayscale = torch.allclose(image[0], image[1]) and torch.allclose(image[1], image[2])
        
        if self.augmentation_strength == 'light':
            if random.random() < 0.5:
                image = torch.flip(image, [2])
            if random.random() < 0.3:
                shift = random.randint(-5, 5)
                if shift != 0:
                    image = torch.roll(image, shift, dims=2)
            if is_grayscale and random.random() < 0.3:
                image = self.apply_false_color(image[0])
            elif not is_grayscale and random.random() < 0.1:
                gray = 0.299 * image[0] + 0.587 * image[1] + 0.114 * image[2]
                image = gray.unsqueeze(0).repeat(3, 1, 1)
        
        elif self.augmentation_strength == 'medium':
            if random.random() < 0.5:
                image = torch.flip(image, [2])
            if random.random() < 0.4:
                factor = random.uniform(0.85, 1.15) if is_grayscale else random.uniform(0.8, 1.2)
                image = torch.clamp(image * factor, 0, 1)
            if random.random() < 0.2:
                angle = random.uniform(-10, 10)
                image = self.rotate_tensor(image, angle)
            if is_grayscale and random.random() < 0.4:
                image = self.apply_false_color(image[0])
            elif not is_grayscale and random.random() < 0.15:
                gray = 0.299 * image[0] + 0.587 * image[1] + 0.114 * image[2]
                image = gray.unsqueeze(0).repeat(3, 1, 1)
        
        elif self.augmentation_strength == 'strong':
            if random.random() < 0.6:
                image = torch.flip(image, [2])
            if random.random() < 0.5:
                factor = random.uniform(0.8, 1.2) if is_grayscale else random.uniform(0.7, 1.3)
                image = torch.clamp(image * factor, 0, 1)
            if random.random() < 0.3:
                angle = random.uniform(-15, 15)
                image = self.rotate_tensor(image, angle)
            if random.random() < 0.2:
                noise = torch.randn_like(image) * 0.02
                image = torch.clamp(image + noise, 0, 1)
            if random.random() < 0.15:
                image = self.apply_blur(image)
            if is_grayscale and random.random() < 0.5:
                image = self.apply_false_color(image[0])
            elif not is_grayscale and random.random() < 0.2:
                gray = 0.299 * image[0] + 0.587 * image[1] + 0.114 * image[2]
                image = gray.unsqueeze(0).repeat(3, 1, 1)
        
        return image
    
    def rotate_tensor(self, image, angle):
        angle_rad = torch.tensor(angle * 3.14159 / 180.0)
        cos_a = torch.cos(angle_rad)
        sin_a = torch.sin(angle_rad)
        
        rotation_matrix = torch.tensor([
            [cos_a, -sin_a, 0],
            [sin_a, cos_a, 0]
        ], dtype=torch.float32).unsqueeze(0)
        
        grid = F.affine_grid(rotation_matrix, image.unsqueeze(0).size(), align_corners=False)
        rotated = F.grid_sample(image.unsqueeze(0), grid, align_corners=False, mode='bilinear', padding_mode='reflection')
        return rotated.squeeze(0)
    
    def apply_blur(self, image):
        kernel_size = random.choice([3, 5])
        sigma = random.uniform(0.5, 1.5)
        
        channels = image.shape[0]
        kernel_1d = torch.exp(-0.5 * (torch.arange(kernel_size, dtype=torch.float32) - kernel_size // 2) ** 2 / sigma ** 2)
        kernel_1d = kernel_1d / kernel_1d.sum()
        kernel_2d = kernel_1d[:, None] * kernel_1d[None, :]
        kernel = kernel_2d.expand(channels, 1, kernel_size, kernel_size)
        
        padding = kernel_size // 2
        blurred = F.conv2d(image.unsqueeze(0), kernel, groups=channels, padding=padding)
        return blurred.squeeze(0)
    
    def apply_false_color(self, grayscale_channel):
        strategy = random.choice(['brown_deer', 'autumn_forest', 'summer_green', 'winter_muted'])
        
        if strategy == 'brown_deer':
            r = torch.clamp(grayscale_channel * 1.1 + 0.1, 0, 1)
            g = torch.clamp(grayscale_channel * 0.9 + 0.05, 0, 1)
            b = torch.clamp(grayscale_channel * 0.7, 0, 1)
        elif strategy == 'autumn_forest':
            r = torch.clamp(grayscale_channel * 1.2 + 0.15, 0, 1)
            g = torch.clamp(grayscale_channel * 1.0 + 0.08, 0, 1)
            b = torch.clamp(grayscale_channel * 0.6, 0, 1)
        elif strategy == 'summer_green':
            r = torch.clamp(grayscale_channel * 0.8 + 0.1, 0, 1)
            g = torch.clamp(grayscale_channel * 1.1 + 0.1, 0, 1)
            b = torch.clamp(grayscale_channel * 0.8, 0, 1)
        else:
            r = torch.clamp(grayscale_channel * 0.95 + 0.05, 0, 1)
            g = torch.clamp(grayscale_channel * 0.98 + 0.02, 0, 1)
            b = torch.clamp(grayscale_channel * 1.05, 0, 1)
        
        return torch.stack([r, g, b], dim=0)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx].clone()
        label = self.y[idx].clone()
        
        if image.max() > 1.0:
            image = image / 255.0
        
        if len(image.shape) == 3 and image.shape[-1] == 3:
            image = image.permute(2, 0, 1)
        
        if image.shape[-2:] != IMAGE_SIZE:
            image = F.interpolate(image.unsqueeze(0), size=IMAGE_SIZE, mode='bilinear', align_corners=False).squeeze(0)
        
        if not self.test_time_aug and self.augmentation_strength != 'none':
            image = self.advanced_augment(image)
        
        if self.test_time_aug and random.random() < 0.5:
            image = torch.flip(image, [2])
        
        image = (image - self.mean) / self.std
        return image, label

class WarmupScheduler:
    def __init__(self, optimizer, warmup_epochs, target_lr, total_epochs):
        self.optimizer = optimizer
        self.warmup_epochs = warmup_epochs
        self.target_lr = target_lr
        self.total_epochs = total_epochs
        self.current_epoch = 0
    
    def step(self):
        if self.current_epoch < self.warmup_epochs:
            lr_scale = (self.current_epoch + 1) / self.warmup_epochs
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = self.target_lr * lr_scale
        self.current_epoch += 1

class GhostNetOptimizer:
    def __init__(self, num_classes, save_dir=None):
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"Using device: {self.device}")
        
        if save_dir is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.save_dir = f"ghostnet_optimization_{timestamp}"
        else:
            self.save_dir = save_dir
        
        os.makedirs(self.save_dir, exist_ok=True)
        
        if torch.cuda.is_available():
            torch.backends.cudnn.benchmark = True
            try:
                from torch.cuda.amp import autocast, GradScaler
                self.scaler = GradScaler()
                self.use_amp = True
                print("Using mixed precision training")
            except ImportError:
                self.use_amp = False
        else:
            self.use_amp = False
    
    def create_optimized_model(self, config):
        model = timm.create_model('ghostnet_100', pretrained=True, num_classes=1000)
        
        in_features = model.classifier.in_features
        head_arch = config.get('head_architecture', 'simple')
        
        if head_arch == 'simple':
            model.classifier = nn.Linear(in_features, self.num_classes)
        elif head_arch == 'complex':
            model.classifier = nn.Sequential(
                nn.Dropout(config['dropout']),
                nn.Linear(in_features, in_features // 2),
                nn.ReLU(inplace=True),
                nn.Dropout(config['dropout'] / 2),
                nn.Linear(in_features // 2, self.num_classes)
            )
        else:
            model.classifier = nn.Sequential(
                nn.AdaptiveAvgPool2d(1),
                nn.Flatten(),
                nn.Dropout(config['dropout']),
                nn.Linear(in_features, in_features // 2),
                nn.ReLU(inplace=True),
                nn.Dropout(config['dropout'] / 2),
                nn.Linear(in_features // 2, in_features // 4),
                nn.ReLU(inplace=True),
                nn.Dropout(config['dropout'] / 4),
                nn.Linear(in_features // 4, self.num_classes)
            )
        
        freeze_layers = config.get('freeze_layers', 3)
        frozen_count = 0
        
        for name, param in model.named_parameters():
            if 'features.' in name and frozen_count < freeze_layers:
                if any(layer in name for layer in ['features.0.', 'features.1.', 'features.2.', 'features.3.', 'features.4.']):
                    param.requires_grad = False
                    frozen_count += 1
        
        model.to(self.device)
        return model
    
    def create_enhanced_augmented_data(self, X_train, y_train):
        class_counts = Counter(y_train)
        target_count = AUGMENTATION_TARGET
        
        X_aug = []
        y_aug = []
        
        for class_idx in range(len(set(y_train))):
            class_mask = y_train == class_idx
            class_images = X_train[class_mask]
            current_count = len(class_images)
            
            if current_count == 0:
                continue
            
            for _ in range(3):
                X_aug.extend(class_images)
                y_aug.extend([class_idx] * current_count)
            
            needed = target_count - (current_count * 3)
            if needed > 0:
                for i in range(needed):
                    orig_idx = random.randint(0, current_count - 1)
                    aug_img = self.enhanced_augment_image(class_images[orig_idx].copy())
                    X_aug.append(aug_img)
                    y_aug.append(class_idx)
        
        return np.array(X_aug), np.array(y_aug)
    
    def enhanced_augment_image(self, image):
        if image.dtype != np.uint8:
            image = image.astype(np.uint8)
        
        is_grayscale = np.allclose(image[:,:,0], image[:,:,1]) and np.allclose(image[:,:,1], image[:,:,2])
        
        if random.random() < 0.8:
            angle = random.uniform(-25, 25)
            h, w = image.shape[:2]
            M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
            image = cv2.warpAffine(image, M, (w, h))
        
        if random.random() < 0.6:
            image = cv2.flip(image, 1)
        
        if random.random() < 0.9:
            alpha = random.uniform(0.7, 1.3) if is_grayscale else random.uniform(0.6, 1.4)
            beta = random.randint(-20, 20) if is_grayscale else random.randint(-30, 30)
            image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
        
        if random.random() < 0.5:
            gamma = random.uniform(0.6, 1.4)
            inv_gamma = 1.0 / gamma
            table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
            image = cv2.LUT(image, table)
        
        if random.random() < 0.4:
            noise_strength = 10 if is_grayscale else 12
            noise = np.random.normal(0, noise_strength, image.shape).astype(np.int16)
            image_int16 = image.astype(np.int16)
            noisy_image = np.clip(image_int16 + noise, 0, 255)
            image = noisy_image.astype(np.uint8)
        
        if random.random() < 0.3:
            ksize = random.choice([3, 5])
            image = cv2.GaussianBlur(image, (ksize, ksize), 0)
        
        return image
    
    def optimize_ghostnet(self, X_train, y_train, X_test, y_test, label_mapping):
        results = []
        best_config = None
        best_score = 0.0
        
        print(f"Starting optimization with {len(OPTIMIZATION_CONFIGS)} configurations")
        print(f"Using {NUM_FOLDS}-fold cross-validation with {MAX_EPOCHS} max epochs per fold")
        
        test_dataset = AdvancedAugmentationDataset(X_test, y_test, 'none', test_time_aug=True)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)
        
        for config_idx, config in enumerate(OPTIMIZATION_CONFIGS):
            print(f"\n[{config_idx+1}/{len(OPTIMIZATION_CONFIGS)}] Testing: {config['name']}")
            
            skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=42)
            fold_cv_scores = []
            fold_test_scores = []
            
            for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
                print(f"  Fold {fold+1}/{NUM_FOLDS}")
                
                X_train_fold = X_train[train_idx]
                X_val_fold = X_train[val_idx]
                y_train_fold = y_train[train_idx]
                y_val_fold = y_train[val_idx]
                
                X_train_aug, y_train_aug = self.create_enhanced_augmented_data(X_train_fold, y_train_fold)
                
                train_dataset = AdvancedAugmentationDataset(X_train_aug, y_train_aug, config['augmentation_strength'])
                val_dataset = AdvancedAugmentationDataset(X_val_fold, y_val_fold, 'none', test_time_aug=True)
                
                train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=4)
                val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
                
                model, val_acc, history = self.train_with_config(train_loader, val_loader, config)
                test_acc, test_f1 = self.evaluate_test_performance(model, test_loader)
                
                fold_cv_scores.append(val_acc)
                fold_test_scores.append(test_acc)
                
                print(f"    CV: {val_acc:.2f}%, Test: {test_acc:.2f}%")
                
                del model, train_loader, val_loader, train_dataset, val_dataset, X_train_aug, y_train_aug
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
            
            cv_mean = np.mean(fold_cv_scores)
            cv_std = np.std(fold_cv_scores)
            test_mean = np.mean(fold_test_scores)
            test_std = np.std(fold_test_scores)
            
            results.append({
                'config_name': config['name'],
                'config': config,
                'cv_mean': cv_mean,
                'cv_std': cv_std,
                'test_mean': test_mean,
                'test_std': test_std,
                'fold_cv_scores': fold_cv_scores,
                'fold_test_scores': fold_test_scores
            })
            
            if test_mean > best_score:
                best_score = test_mean
                best_config = config
                print(f"  NEW BEST: {test_mean:.2f}% test accuracy")
            
            print(f"  Final: CV={cv_mean:.2f}±{cv_std:.2f}%, Test={test_mean:.2f}±{test_std:.2f}%")
        
        self.save_results(results, label_mapping)
        return results, best_config, None
    
    def train_with_config(self, train_loader, val_loader, config):
        model = self.create_optimized_model(config)
        
        criterion = nn.CrossEntropyLoss(label_smoothing=config['label_smoothing'])
        
        backbone_params = []
        classifier_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'classifier' in name:
                    classifier_params.append(param)
                else:
                    backbone_params.append(param)
        
        optimizer = optim.AdamW([
            {'params': backbone_params, 'lr': config['lr_backbone']},
            {'params': classifier_params, 'lr': config['lr_head']}
        ], weight_decay=config['weight_decay'])
        
        if config['scheduler'] == 'cosine':
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=MAX_EPOCHS)
        elif config['scheduler'] == 'step':
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
        else:
            scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=config['lr_backbone']/10, max_lr=config['lr_backbone'], step_size_up=10)
        
        warmup_scheduler = WarmupScheduler(optimizer, config['warmup_epochs'], config['lr_backbone'], MAX_EPOCHS)
        
        best_val_acc = 0.0
        patience_counter = 0
        best_state = None
        
        training_history = {
            'train_accs': [], 'val_accs': [], 'train_losses': [], 'val_losses': []
        }
        
        for epoch in range(MAX_EPOCHS):
            if epoch < config['warmup_epochs']:
                warmup_scheduler.step()
            
            model.train()
            train_correct = 0
            train_total = 0
            train_loss_total = 0.0
            train_batches = 0
            
            for batch_idx, (images, labels) in enumerate(train_loader):
                images, labels = images.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                
                if self.use_amp:
                    with torch.cuda.amp.autocast():
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                    self.scaler.scale(loss).backward()
                    self.scaler.step(optimizer)
                    self.scaler.update()
                else:
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                
                _, predicted = torch.max(outputs, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
                train_loss_total += loss.item()
                train_batches += 1
            
            if epoch >= config['warmup_epochs']:
                scheduler.step()
            
            train_acc = 100 * train_correct / train_total
            train_loss = train_loss_total / train_batches
            
            model.eval()
            val_correct = 0
            val_total = 0
            val_loss_total = 0.0
            val_batches = 0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    
                    if self.use_amp:
                        with torch.cuda.amp.autocast():
                            outputs = model(images)
                            loss = criterion(outputs, labels)
                    else:
                        outputs = model(images)
                        loss = criterion(outputs, labels)
                    
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
                    val_loss_total += loss.item()
                    val_batches += 1
            
            val_acc = 100 * val_correct / val_total
            val_loss = val_loss_total / val_batches
            
            training_history['train_accs'].append(train_acc)
            training_history['val_accs'].append(val_acc)
            training_history['train_losses'].append(train_loss)
            training_history['val_losses'].append(val_loss)
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                best_state = model.state_dict().copy()
            else:
                patience_counter += 1
            
            if patience_counter >= PATIENCE:
                print(f"    Early stopping at epoch {epoch+1}")
                break
        
        if best_state is not None:
            model.load_state_dict(best_state)
        
        return model, best_val_acc, training_history
    
    def evaluate_test_performance(self, model, test_loader):
        model.eval()
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                
                if self.use_amp:
                    with torch.cuda.amp.autocast():
                        outputs1 = model(images)
                        flipped = torch.flip(images, [3])
                        outputs2 = model(flipped)
                else:
                    outputs1 = model(images)
                    flipped = torch.flip(images, [3])
                    outputs2 = model(flipped)
                
                outputs = (outputs1 + outputs2) / 2
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = 100 * correct / total
        f1_macro = f1_score(all_labels, all_preds, average='macro')
        
        return accuracy, f1_macro
    
    def save_results(self, results, label_mapping):
        results_file = os.path.join(self.save_dir, 'optimization_results.json')
        
        results_data = {
            'timestamp': datetime.now().isoformat(),
            'label_mapping': {str(k): v for k, v in label_mapping.items()},
            'configs_tested': len(results),
            'results': []
        }
        
        for result in results:
            results_data['results'].append({
                'config_name': result['config_name'],
                'config': result['config'],
                'cv_mean': float(result['cv_mean']),
                'cv_std': float(result['cv_std']),
                'test_mean': float(result['test_mean']),
                'test_std': float(result['test_std']),
                'fold_cv_scores': [float(x) for x in result['fold_cv_scores']],
                'fold_test_scores': [float(x) for x in result['fold_test_scores']]
            })
        
        with open(results_file, 'w') as f:
            json.dump(results_data, f, indent=2)

def load_color_and_grayscale_data():
    fpath = "G:\\Dropbox\\AI Projects\\buck\\images\\squared\\**\\*_NDA.png"
    image_paths = glob.glob(fpath, recursive=True)
    
    images = []
    ages = []
    color_count = 0
    grayscale_count = 0
    
    print(f"Loading {len(image_paths)} images...")
    
    for img_path in image_paths:
        try:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            if img is None:
                continue
            
            original_is_grayscale = len(img.shape) == 2
            
            if len(img.shape) == 2:
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                grayscale_count += 1
            elif len(img.shape) == 3 and img.shape[2] == 3:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                color_count += 1
            elif len(img.shape) == 3 and img.shape[2] == 4:
                img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
                color_count += 1
            else:
                continue
            
            img_resized = cv2.resize(img, (224, 224))
            
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            parts = filename_no_ext.split('_')
            
            if len(parts) < 5:
                continue
            
            age_part = parts[3]
            if 'xpx' in age_part.lower() or 'p' not in age_part:
                continue
            
            value_str = age_part.replace('p', '.')
            age_value = float(value_str)
            age_value = 5.5 if age_value >= 5.5 else age_value
            
            images.append(img_resized)
            ages.append(age_value)
            
        except Exception as e:
            continue
    
    print(f"Loaded {len(images)} images ({color_count} color, {grayscale_count} grayscale)")
    
    age_counts = Counter(ages)
    valid_ages = {age for age, count in age_counts.items() if count >= 3}
    
    filtered_images = []
    filtered_ages = []
    
    for img, age in zip(images, ages):
        if age in valid_ages:
            filtered_images.append(img)
            filtered_ages.append(age)
    
    return np.array(filtered_images), filtered_ages

def main():
    print("GhostNet-100 Advanced Optimization")
    print("=" * 50)
    
    start_time = time.time()
    
    images, ages = load_color_and_grayscale_data()
    
    unique_ages = sorted(list(set(ages)))
    label_mapping = {age: i for i, age in enumerate(unique_ages)}
    y_indices = np.array([label_mapping[age] for age in ages])
    
    print(f"Dataset: {len(images)} images, {len(unique_ages)} age classes")
    print(f"Age distribution: {dict(Counter(ages))}")
    
    X_train, X_test, y_train, y_test = train_test_split(
        images, y_indices, test_size=0.2, random_state=42, stratify=y_indices
    )
    
    print(f"Training: {len(X_train)}, Test: {len(X_test)}")
    
    optimizer = GhostNetOptimizer(num_classes=len(unique_ages))
    results, best_config, _ = optimizer.optimize_ghostnet(
        X_train, y_train, X_test, y_test, label_mapping
    )
    
    elapsed = (time.time() - start_time) / 60
    best_score = max(result['test_mean'] for result in results)
    
    print("\n" + "=" * 50)
    print("FINAL RESULTS")
    print("=" * 50)
    
    for result in results:
        print(f"{result['config_name']:>20}: "
              f"CV={result['cv_mean']:.2f}±{result['cv_std']:.2f}%, "
              f"Test={result['test_mean']:.2f}±{result['test_std']:.2f}%")
    
    print(f"\nBest: {best_config['name']} - {best_score:.2f}%")
    print(f"Training time: {elapsed:.1f} minutes")
    print(f"Results saved to: {optimizer.save_dir}")

if __name__ == "__main__":
    main()

GhostNet-100 Advanced Optimization
Loading 237 images...
Loaded 236 images (237 color, 0 grayscale)
Dataset: 236 images, 5 age classes
Age distribution: {2.5: 40, 3.5: 50, 4.5: 56, 5.5: 58, 1.5: 32}
Training: 188, Test: 48
Using device: cuda
Using mixed precision training
Starting optimization with 8 configurations
Using 5-fold cross-validation with 60 max epochs per fold

[1/8] Testing: baseline_deep
  Fold 1/5


RuntimeError: DataLoader worker (pid(s) 18728, 11216, 6760, 31408) exited unexpectedly