## What changed?

This notebook takes the output result of `250813_nda_all` and attempts to optimize a single model instead of an ensemble.

In [1]:
# Check if CUDA
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("❌ CUDA not detected by PyTorch")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1
GPU count: 1
GPU name: NVIDIA GeForce RTX 5090
GPU memory: 31.8 GB


NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90.
If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



### Feeding different data folds to model

- Trying to recapture  ✓ Val: 84.2%, Test: 81.2%

In [2]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import cv2
import random
import json
import os
import glob
import itertools
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Fixed hyperparameters from combination 23 - RTX 5090 optimized
FIXED_HYPERPARAMS = {
    'backbone_lr': 0.0001,
    'classifier_lr': 0.0005,
    'batch_size': 64,  # Increased from 12 to 64 for RTX 5090
    'optimizer': 'adamw',
    'dropout': 0.4,
    'freeze_layers': 4
}

# Missing hyperparameters to test
MISSING_HYPERPARAMS = {
    'weight_decay': [0.05, 0.08],
    'scheduler': ['cosine', 'plateau'],
    'label_smoothing': [0.15, 0.2],
    'augmentation_strength': ['medium', 'heavy']
}

IMAGE_SIZE = (384, 384)  # Increased from 224x224 to utilize more GPU power
AUGMENTATION_TARGET = 2000  # Increased from 1000
NUM_FOLDS = 25  # Increased from 20
NUM_WORKERS = 8  # Added for faster data loading

def detect_and_convert_image(image):
    if len(image.shape) == 2:
        return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    elif len(image.shape) == 3:
        if image.shape[2] == 1:
            return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        elif image.shape[2] == 3:
            return image
        elif image.shape[2] == 4:
            return cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
    return image

def load_combined_data():
    color_path = "G:\\Dropbox\\AI Projects\\buck\\images\\squared\\color\\*_NDA.png"
    gray_path = "G:\\Dropbox\\AI Projects\\buck\\images\\squared\\grayscale\\*_NDA.png"
    
    images = []
    ages = []
    sources = []
    
    print("Loading color images...")
    color_files = glob.glob(color_path)
    for img_path in color_files:
        try:
            img = cv2.imread(img_path)
            if img is None:
                continue
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = detect_and_convert_image(img)
            img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
            
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            parts = filename_no_ext.split('_')
            
            if len(parts) < 5:
                continue
            
            age_part = parts[3]
            if 'xpx' in age_part.lower() or 'p' not in age_part:
                continue
            
            try:
                age_value = float(age_part.replace('p', '.'))
                images.append(img_resized)
                ages.append(age_value)
                sources.append('color')
            except ValueError:
                continue
                
        except Exception as e:
            continue
    
    print(f"Loaded {len([s for s in sources if s == 'color'])} color images")
    
    print("Loading grayscale images...")
    gray_files = glob.glob(gray_path)
    for img_path in gray_files:
        try:
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            if img is None:
                continue
            
            img = detect_and_convert_image(img)
            img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
            
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            parts = filename_no_ext.split('_')
            
            if len(parts) < 5:
                continue
            
            age_part = parts[3]
            if 'xpx' in age_part.lower() or 'p' not in age_part:
                continue
            
            try:
                age_value = float(age_part.replace('p', '.'))
                images.append(img_resized)
                ages.append(age_value)
                sources.append('grayscale')
            except ValueError:
                continue
                
        except Exception as e:
            continue
    
    print(f"Loaded {len([s for s in sources if s == 'grayscale'])} grayscale images")
    print(f"Total images: {len(images)}")
    
    ages_grouped = [5.5 if age >= 5.5 else age for age in ages]
    
    age_counts = Counter(ages_grouped)
    valid_ages = {age for age, count in age_counts.items() if count >= 3}
    
    filtered_images = []
    filtered_ages = []
    filtered_sources = []
    
    for img, age, source in zip(images, ages_grouped, sources):
        if age in valid_ages:
            filtered_images.append(img)
            filtered_ages.append(age)
            filtered_sources.append(source)
    
    print(f"Final dataset: {len(filtered_images)} images")
    print(f"Age distribution: {dict(Counter(filtered_ages))}")
    
    return np.array(filtered_images), filtered_ages, filtered_sources

def enhanced_augment_image(image, strength='medium'):
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    
    if strength == 'light':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.5, 0.3, 0.6, 0.2, 0.1
        rot_range, bright_range = 10, (0.8, 1.2)
    elif strength == 'medium':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.7, 0.5, 0.8, 0.4, 0.3
        rot_range, bright_range = 15, (0.7, 1.3)
    else:  # heavy
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.8, 0.6, 0.9, 0.5, 0.4
        rot_range, bright_range = 20, (0.6, 1.4)
    
    if random.random() < rot_prob:
        angle = random.uniform(-rot_range, rot_range)
        h, w = image.shape[:2]
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
        image = cv2.warpAffine(image, M, (w, h))
    
    if random.random() < flip_prob:
        image = cv2.flip(image, 1)
    
    if len(image.shape) == 3 and image.shape[2] == 3 and random.random() < 0.4:
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        image = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
    
    if random.random() < bright_prob:
        alpha = random.uniform(*bright_range)
        beta = random.randint(-25, 25)
        image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    
    if random.random() < gamma_prob:
        gamma = random.uniform(0.8, 1.2)
        inv_gamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
        image = cv2.LUT(image, table)
    
    if random.random() < noise_prob:
        noise = np.random.normal(0, 7, image.shape).astype(np.int16)
        image_int16 = image.astype(np.int16)
        noisy_image = np.clip(image_int16 + noise, 0, 255)
        image = noisy_image.astype(np.uint8)
    
    return image

def create_balanced_dataset(X, y, aug_strength='medium', target_count=2000):
    class_counts = Counter(y)
    max_count = max(class_counts.values())
    final_target = max(target_count, max_count)
    
    X_balanced = []
    y_balanced = []
    
    unique_classes = sorted(list(set(y)))
    
    for class_idx in unique_classes:
        class_mask = np.array(y) == class_idx
        class_images = X[class_mask]
        current_count = len(class_images)
        
        if current_count == 0:
            continue
        
        X_balanced.extend(class_images)
        y_balanced.extend([class_idx] * current_count)
        
        needed = final_target - current_count
        for i in range(needed):
            orig_idx = random.randint(0, current_count - 1)
            aug_img = enhanced_augment_image(class_images[orig_idx].copy(), aug_strength)
            X_balanced.append(aug_img)
            y_balanced.append(class_idx)
    
    final_counts = Counter(y_balanced)
    print(f"Final class distribution: {dict(final_counts)}")
    assert len(set(final_counts.values())) == 1, "Classes are not perfectly balanced!"
    
    return np.array(X_balanced), np.array(y_balanced)

class DeerDataset(Dataset):
    def __init__(self, X, y, training=True):
        self.X = torch.FloatTensor(X if isinstance(X, np.ndarray) else np.array(X))
        self.y = torch.LongTensor(y if isinstance(y, np.ndarray) else np.array(y))
        self.training = training
        self.mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        self.std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        image = self.X[idx].clone()
        label = self.y[idx].clone()
        
        if image.max() > 1.0:
            image = image / 255.0
        
        if len(image.shape) == 3 and image.shape[-1] == 3:
            image = image.permute(2, 0, 1)
        
        if not self.training and random.random() < 0.5:
            image = torch.flip(image, [2])
        
        image = (image - self.mean) / self.std
        return image, label

class GhostNetCombo23:
    def __init__(self, num_classes, save_dir=None):
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        if save_dir is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.save_dir = f"ghostnet_rtx5090_{timestamp}"
        else:
            self.save_dir = save_dir
        
        os.makedirs(self.save_dir, exist_ok=True)
        self.best_multiplicative_score = 0.0  # Changed from composite score
        
        print(f"Using device: {self.device}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name()}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.deterministic = False  # Allow non-deterministic for speed
    
    def create_ghostnet_model(self, dropout=0.3, freeze_layers=4):
        # Use EfficientNet-B2 instead of B0 to utilize more GPU power
        model = models.efficientnet_b2(pretrained=True)
        
        # Freeze early layers
        layers_to_freeze = list(model.features.children())[:freeze_layers]
        for layer in layers_to_freeze:
            for param in layer.parameters():
                param.requires_grad = False
        
        # Replace classifier
        model.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(model.classifier[1].in_features, self.num_classes)
        )
        
        return model.to(self.device)
    
    def get_optimizer(self, model, opt_type, backbone_lr, classifier_lr, weight_decay):
        backbone_params = []
        classifier_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'classifier' in name:
                    classifier_params.append(param)
                else:
                    backbone_params.append(param)
        
        param_groups = [
            {'params': backbone_params, 'lr': backbone_lr},
            {'params': classifier_params, 'lr': classifier_lr}
        ]
        
        if opt_type == 'adamw':
            return optim.AdamW(param_groups, weight_decay=weight_decay)
        elif opt_type == 'sgd':
            return optim.SGD(param_groups, weight_decay=weight_decay, momentum=0.9)
        else:
            raise ValueError(f"Unknown optimizer: {opt_type}")
    
    def get_scheduler(self, optimizer, scheduler_type, max_epochs):
        if scheduler_type == 'cosine':
            return optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=max_epochs, eta_min=1e-6)
        elif scheduler_type == 'plateau':
            return optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5, verbose=False)
        else:
            raise ValueError(f"Unknown scheduler: {scheduler_type}")
    
    def train_with_hyperparams(self, train_loader, val_loader, test_loader, hyperparams, fold_num):
        model = self.create_ghostnet_model(
            dropout=hyperparams['dropout'], 
            freeze_layers=hyperparams['freeze_layers']
        )
        
        optimizer = self.get_optimizer(
            model, hyperparams['optimizer'], 
            hyperparams['backbone_lr'], hyperparams['classifier_lr'], 
            hyperparams['weight_decay']
        )
        
        scheduler = self.get_scheduler(optimizer, hyperparams['scheduler'], 100)  # Increased epochs
        criterion = nn.CrossEntropyLoss(label_smoothing=hyperparams['label_smoothing'])
        
        best_val_acc = 0.0
        patience = 35  # Increased patience
        patience_counter = 0
        best_state = None
        
        for epoch in range(100):  # Increased from 80
            model.train()
            train_correct = 0
            train_total = 0
            
            for images, labels in train_loader:
                images, labels = images.to(self.device, non_blocking=True), labels.to(self.device, non_blocking=True)
                optimizer.zero_grad()
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                _, predicted = torch.max(outputs, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
            
            model.eval()
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for images, labels in val_loader:
                    images, labels = images.to(self.device, non_blocking=True), labels.to(self.device, non_blocking=True)
                    outputs = model(images)
                    
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
            
            train_acc = 100 * train_correct / train_total
            val_acc = 100 * val_correct / val_total
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                patience_counter = 0
                best_state = model.state_dict().copy()
            else:
                patience_counter += 1
            
            if hyperparams['scheduler'] == 'plateau':
                scheduler.step(val_acc)
            else:
                scheduler.step()
            
            if epoch % 25 == 0 and epoch > 0:
                print(f"    Epoch {epoch}: Train {train_acc:.1f}%, Val {val_acc:.1f}%")
            
            if patience_counter >= patience:
                print(f"    Early stopping at epoch {epoch} (no improvement)")
                break
            
            if epoch % 10 == 0:
                torch.cuda.empty_cache()
        
        if best_state is not None:
            model.load_state_dict(best_state)
        
        model.eval()
        test_correct = 0
        test_total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(self.device, non_blocking=True), labels.to(self.device, non_blocking=True)
                
                outputs1 = model(images)
                flipped = torch.flip(images, [3])
                outputs2 = model(flipped)
                outputs = (outputs1 + outputs2) / 2
                
                _, predicted = torch.max(outputs, 1)
                test_total += labels.size(0)
                test_correct += (predicted == labels).sum().item()
        
        test_acc = 100 * test_correct / test_total
        
        # NEW SUCCESS METRIC: val_acc * test_acc
        multiplicative_score = (best_val_acc / 100) * (test_acc / 100) * 10000  # Scale for readability
        
        # Only save if this is a new global best
        if multiplicative_score > self.best_multiplicative_score:
            self.best_multiplicative_score = multiplicative_score
            
            save_path = os.path.join(self.save_dir, f"best_model_mult_{multiplicative_score:.1f}_val_{best_val_acc:.1f}_test_{test_acc:.1f}_fold_{fold_num}.pth")
            torch.save({
                'model_state_dict': model.state_dict(),
                'hyperparams': hyperparams,
                'val_accuracy': best_val_acc,
                'test_accuracy': test_acc,
                'train_accuracy': train_acc,
                'multiplicative_score': multiplicative_score,
                'fold_number': fold_num
            }, save_path)
            print(f"    NEW GLOBAL BEST! Mult Score: {multiplicative_score:.1f}, Val: {best_val_acc:.1f}%, Test: {test_acc:.1f}% (Fold {fold_num})")
            return True
            
        return False
    
    def run_fold_search(self, images, ages, sources):
        print(f"Starting RTX 5090 optimized training with {NUM_FOLDS} folds...")
        print(f"Success metric: (val_acc * test_acc) - only saving global bests")
        
        unique_ages = sorted(list(set(ages)))
        label_mapping = {age: i for i, age in enumerate(unique_ages)}
        y_indices = np.array([label_mapping[age] for age in ages])
        
        print(f"Classes: {len(unique_ages)}")
        print(f"Label mapping: {label_mapping}")
        
        missing_keys = list(MISSING_HYPERPARAMS.keys())
        missing_values = list(MISSING_HYPERPARAMS.values())
        missing_combos = list(itertools.product(*missing_values))
        
        results = []
        best_found = False
        
        for combo_idx, missing_combo in enumerate(missing_combos, 1):
            hyperparams = FIXED_HYPERPARAMS.copy()
            for key, value in zip(missing_keys, missing_combo):
                hyperparams[key] = value
            
            print(f"\n[Hyperparams {combo_idx}/{len(missing_combos)}] Testing:")
            print(f"  Fixed: adamw, lr={hyperparams['backbone_lr']}/{hyperparams['classifier_lr']}, batch={hyperparams['batch_size']}")
            print(f"  Variable: wd={hyperparams['weight_decay']}, sched={hyperparams['scheduler']}, smooth={hyperparams['label_smoothing']}, aug={hyperparams['augmentation_strength']}")
            
            fold_results = []
            
            for fold in range(1, NUM_FOLDS + 1):
                random.seed(fold * 42)
                np.random.seed(fold * 42)
                torch.manual_seed(fold * 42)
                
                print(f"\n  [Fold {fold:2d}/{NUM_FOLDS}]", end=" ")
                
                try:
                    X_train, X_test, y_train, y_test = train_test_split(
                        images, y_indices, test_size=0.2, random_state=fold * 42, stratify=y_indices
                    )
                    
                    X_train_final, X_val, y_train_final, y_val = train_test_split(
                        X_train, y_train, test_size=0.2, random_state=fold * 42 + 1, stratify=y_train
                    )
                    
                    X_train_aug, y_train_aug = create_balanced_dataset(
                        X_train_final, y_train_final, hyperparams['augmentation_strength'], AUGMENTATION_TARGET
                    )
                    
                    train_dataset = DeerDataset(X_train_aug, y_train_aug, training=True)
                    val_dataset = DeerDataset(X_val, y_val, training=False)
                    test_dataset = DeerDataset(X_test, y_test, training=False)
                    
                    train_loader = DataLoader(train_dataset, batch_size=hyperparams['batch_size'], 
                                            shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
                    val_loader = DataLoader(val_dataset, batch_size=hyperparams['batch_size'], 
                                          shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
                    test_loader = DataLoader(test_dataset, batch_size=hyperparams['batch_size'], 
                                           shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
                    
                    was_best = self.train_with_hyperparams(
                        train_loader, val_loader, test_loader, hyperparams, fold
                    )
                    
                    if was_best:
                        best_found = True
                    
                    torch.cuda.empty_cache()
                    
                except Exception as e:
                    print(f"FAILED: {str(e)}")
                    continue
        
        print(f"\n{'='*60}")
        print(f"RTX 5090 OPTIMIZED TRAINING COMPLETE")
        print(f"{'='*60}")
        print(f"Best Multiplicative Score: {self.best_multiplicative_score:.1f}")
        print(f"Success Metric: (validation_accuracy * test_accuracy)")
        print(f"Only global best models were saved")
        print(f"Results saved to: {self.save_dir}")
        
        return results

def main():
    print("RTX 5090 Optimized Deer Age Model Training")
    print("=" * 60)
    
    start_time = time.time()
    
    images, ages, sources = load_combined_data()
    
    searcher = GhostNetCombo23(num_classes=len(set(ages)))
    
    results = searcher.run_fold_search(images, ages, sources)
    
    elapsed = (time.time() - start_time) / 60
    print(f"\nTotal Time: {elapsed:.1f} minutes")

if __name__ == "__main__":
    main()

RTX 5090 Optimized Deer Age Model Training
Loading color images...
Loaded 0 color images
Loading grayscale images...
Loaded 0 grayscale images
Total images: 0
Final dataset: 0 images
Age distribution: {}
Using device: cuda
GPU: NVIDIA GeForce RTX 5090
GPU Memory: 34.2 GB
Starting RTX 5090 optimized training with 25 folds...
Success metric: (val_acc * test_acc) - only saving global bests
Classes: 0
Label mapping: {}

[Hyperparams 1/16] Testing:
  Fixed: adamw, lr=0.0001/0.0005, batch=64
  Variable: wd=0.05, sched=cosine, smooth=0.15, aug=medium

  [Fold  1/25] FAILED: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

  [Fold  2/25] FAILED: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

  [Fold  3/25] FAILED: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any o