### Check RTX5090 running CUDA

In [1]:
import torch
import torchvision.models as models

# Check if CUDA
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("❌ CUDA not detected by PyTorch")

# Test ResNet50 specifically
model = models.resnet50(pretrained=True).cuda()
test_batch = torch.randn(2, 3, 224, 224).cuda()
try:
    output = model(test_batch)
    print("ResNet50 works!")
except Exception as e:
    print(f"ResNet50 failed: {e}")

# Test EfficientNet
try:
    model_eff = models.efficientnet_b0(pretrained=True).cuda()
    output_eff = model_eff(test_batch)
    print("EfficientNet works!")
except Exception as e:
    print(f"EfficientNet failed: {e}")

PyTorch version: 2.10.0.dev20250922+cu128
CUDA available: True
CUDA version: 12.8
GPU count: 1
GPU name: NVIDIA GeForce RTX 5090
GPU memory: 31.8 GB




ResNet50 works!
EfficientNet works!




### Process deer data

In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import cv2
import random
import json
import os
import glob
import itertools
import gc
from datetime import datetime
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

IMAGE_SIZE = (600, 600)
AUGMENTATION_TARGET = 1000
NUM_FOLDS = 10
NUM_WORKERS = 0
MIXED_PRECISION = True
COMPILE_MODEL = False

# Simplified to best performing models
MODEL_CONFIGS = {
    'efficientnet_b0': {'model_fn': models.efficientnet_b0, 'batch_size': 128, 'freeze_layers': 3},
    'efficientnet_b1': {'model_fn': models.efficientnet_b1, 'batch_size': 96, 'freeze_layers': 3},
    'efficientnet_b2': {'model_fn': models.efficientnet_b2, 'batch_size': 80, 'freeze_layers': 3},
    'efficientnet_b3': {'model_fn': models.efficientnet_b3, 'batch_size': 64, 'freeze_layers': 3},
    'efficientnet_b4': {'model_fn': models.efficientnet_b4, 'batch_size': 48, 'freeze_layers': 3},
    'efficientnet_b5': {'model_fn': models.efficientnet_b5, 'batch_size': 32, 'freeze_layers': 3},
    'efficientnet_b6': {'model_fn': models.efficientnet_b6, 'batch_size': 24, 'freeze_layers': 3},
    'efficientnet_b7': {'model_fn': models.efficientnet_b7, 'batch_size': 16, 'freeze_layers': 3},
    'resnet18': {'model_fn': models.resnet18, 'batch_size': 256, 'freeze_layers': 2},
    'resnet34': {'model_fn': models.resnet34, 'batch_size': 192, 'freeze_layers': 2},
    'resnet50': {'model_fn': models.resnet50, 'batch_size': 128, 'freeze_layers': 2},
    'resnet101': {'model_fn': models.resnet101, 'batch_size': 80, 'freeze_layers': 2},
    'resnet152': {'model_fn': models.resnet152, 'batch_size': 64, 'freeze_layers': 2},
}

TRAINING_CONFIG = {
    'backbone_lr': 0.0001,
    'classifier_lr': 0.0005,
    'optimizer': 'adamw',
    'weight_decay': 0.05,
    'scheduler': 'cosine',
    'label_smoothing': 0.1,
    'dropout': 0.3,
    'max_epochs': 80,
    'patience': 25,
    'augmentation_strength': 'medium'
}

def detect_and_convert_image(image):
    if len(image.shape) == 2:
        return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    elif len(image.shape) == 3:
        if image.shape[2] == 1:
            return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        elif image.shape[2] == 3:
            return image
        elif image.shape[2] == 4:
            return cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
    return image

def parse_filename(filename):
    """Parse filename: YYMMDD_YYMMDD_CC_EpE_NDA"""
    parts = filename.split('_')
    if len(parts) < 5:
        return None
    
    date_saved = parts[0]
    date_taken = parts[1]
    state = parts[2]
    age_part = parts[3]
    
    # Filter out unknown values
    if 'U' in date_taken or 'U' in state or 'U' in age_part:
        return None
    
    # Validate formats
    if len(date_taken) != 6 or not date_taken.isdigit():
        return None
    if len(state) < 2 or not state[:2].isalpha():
        return None
    if 'xpx' in age_part.lower() or 'p' not in age_part:
        return None
    
    try:
        age = float(age_part.replace('p', '.'))
        
        # Parse date taken to numeric features
        year = int(date_taken[0:2])
        month = int(date_taken[2:4])
        day = int(date_taken[4:6])
        
        # Validate date components
        if month < 1 or month > 12 or day < 1 or day > 31:
            return None
        
        return {
            'age': age,
            'year': year,
            'month': month,
            'day': day,
            'state': state[:2].upper()
        }
    except ValueError:
        return None

def load_combined_data():
    color_path = "D:\\Dropbox\\AI Projects\\buck\\images\\squared\\color\\*.png"
    gray_path = "D:\\Dropbox\\AI Projects\\buck\\images\\squared\\grayscale\\*.png"
    
    images = []
    ages = []
    features = []
    sources = []
    
    print("Loading color images...")
    color_files = glob.glob(color_path)
    for img_path in color_files:
        try:
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            
            parsed = parse_filename(filename_no_ext)
            if parsed is None:
                continue
            
            img = cv2.imread(img_path)
            if img is None:
                continue
            
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = detect_and_convert_image(img)
            img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
            
            images.append(img_resized)
            ages.append(parsed['age'])
            features.append({
                'year': parsed['year'],
                'month': parsed['month'],
                'day': parsed['day'],
                'state': parsed['state']
            })
            sources.append('color')
                
        except Exception as e:
            continue
    
    print(f"Loaded {len([s for s in sources if s == 'color'])} color images")
    
    print("Loading grayscale images...")
    gray_files = glob.glob(gray_path)
    for img_path in gray_files:
        try:
            filename = os.path.basename(img_path)
            filename_no_ext = os.path.splitext(filename)[0]
            
            parsed = parse_filename(filename_no_ext)
            if parsed is None:
                continue
            
            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
            if img is None:
                continue
            
            img = detect_and_convert_image(img)
            img_resized = cv2.resize(img, IMAGE_SIZE[::-1])
            
            images.append(img_resized)
            ages.append(parsed['age'])
            features.append({
                'year': parsed['year'],
                'month': parsed['month'],
                'day': parsed['day'],
                'state': parsed['state']
            })
            sources.append('grayscale')
                
        except Exception as e:
            continue
    
    print(f"Loaded {len([s for s in sources if s == 'grayscale'])} grayscale images")
    print(f"Total images: {len(images)}")
    
    # Group ages
    ages_grouped = [5.5 if age >= 5.5 else age for age in ages]
    
    # Filter by minimum class count
    age_counts = Counter(ages_grouped)
    valid_ages = {age for age, count in age_counts.items() if count >= 3}
    
    filtered_images = []
    filtered_ages = []
    filtered_features = []
    filtered_sources = []
    
    for img, age, feat, source in zip(images, ages_grouped, features, sources):
        if age in valid_ages:
            filtered_images.append(img)
            filtered_ages.append(age)
            filtered_features.append(feat)
            filtered_sources.append(source)
    
    print(f"Final dataset: {len(filtered_images)} images")
    print(f"Age distribution: {dict(Counter(filtered_ages))}")
    
    # Encode states
    states = [f['state'] for f in filtered_features]
    state_encoder = LabelEncoder()
    state_encoded = state_encoder.fit_transform(states)
    
    print(f"Unique states: {list(state_encoder.classes_)}")
    
    # Create feature matrix: [year, month, day, state_encoded]
    feature_matrix = np.array([
        [f['year'], f['month'], f['day'], state_enc]
        for f, state_enc in zip(filtered_features, state_encoded)
    ], dtype=np.float32)
    
    return (np.array(filtered_images, dtype=np.uint8), 
            filtered_ages, 
            feature_matrix, 
            state_encoder,
            filtered_sources)

def enhanced_augment_image(image, strength='medium'):
    if image.dtype != np.uint8:
        image = image.astype(np.uint8)
    
    if strength == 'light':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.5, 0.3, 0.6, 0.2, 0.1
        rot_range, bright_range = 8, (0.85, 1.15)
    elif strength == 'medium':
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.7, 0.5, 0.8, 0.4, 0.3
        rot_range, bright_range = 12, (0.75, 1.25)
    else:
        rot_prob, flip_prob, bright_prob, gamma_prob, noise_prob = 0.8, 0.6, 0.9, 0.5, 0.4
        rot_range, bright_range = 18, (0.65, 1.35)
    
    if random.random() < rot_prob:
        angle = random.uniform(-rot_range, rot_range)
        h, w = image.shape[:2]
        M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
        image = cv2.warpAffine(image, M, (w, h))
    
    if random.random() < flip_prob:
        image = cv2.flip(image, 1)
    
    if len(image.shape) == 3 and image.shape[2] == 3 and random.random() < 0.3:
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        image = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
    
    if random.random() < bright_prob:
        alpha = random.uniform(*bright_range)
        beta = random.randint(-20, 20)
        image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    
    if random.random() < gamma_prob:
        gamma = random.uniform(0.85, 1.15)
        inv_gamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** inv_gamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
        image = cv2.LUT(image, table)
    
    if random.random() < noise_prob:
        noise = np.random.normal(0, 5, image.shape).astype(np.int16)
        image_int16 = image.astype(np.int16)
        noisy_image = np.clip(image_int16 + noise, 0, 255)
        image = noisy_image.astype(np.uint8)
    
    return image

class MultiModalDataset(Dataset):
    def __init__(self, base_images, labels, features, aug_strength='medium', target_per_class=1000, training=True):
        self.base_images = base_images
        self.labels = np.array(labels)
        self.features = features
        self.aug_strength = aug_strength
        self.training = training
        self.target_per_class = target_per_class
        
        unique_classes = np.unique(labels)
        self.class_to_indices = {}
        for cls in unique_classes:
            self.class_to_indices[cls] = np.where(self.labels == cls)[0]
        
        self.num_classes = len(unique_classes)
        self.class_list = sorted(unique_classes)
        self.length = self.num_classes * self.target_per_class
        
        self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(3, 1, 1)
        self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(3, 1, 1)
        
        print(f"Dataset: {self.length} samples from {len(base_images)} base images")
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        class_idx = idx // self.target_per_class
        within_class_idx = idx % self.target_per_class
        
        target_class = self.class_list[class_idx]
        available_indices = self.class_to_indices[target_class]
        
        base_idx = available_indices[within_class_idx % len(available_indices)]
        image = self.base_images[base_idx].copy()
        feature_vec = self.features[base_idx].copy()
        
        if self.training and within_class_idx >= len(available_indices):
            image = enhanced_augment_image(image, self.aug_strength)
        
        image = image.astype(np.float32) / 255.0
        if len(image.shape) == 3:
            image = image.transpose(2, 0, 1)
        
        if not self.training and random.random() < 0.5:
            image = np.flip(image, axis=2).copy()
        
        image = (image - self.mean) / self.std
        
        return (torch.from_numpy(image.astype(np.float32)), 
                torch.from_numpy(feature_vec.astype(np.float32)), 
                target_class)

class MultiModalModel(nn.Module):
    def __init__(self, base_model, num_classes, num_features, model_type='efficientnet'):
        super().__init__()
        self.base_model = base_model
        self.model_type = model_type
        
        # Get feature dimension from base model
        if hasattr(base_model, 'classifier'):
            if isinstance(base_model.classifier, nn.Sequential):
                feat_dim = base_model.classifier[-1].in_features
                base_model.classifier = nn.Identity()
            else:
                feat_dim = base_model.classifier.in_features
                base_model.classifier = nn.Identity()
        elif hasattr(base_model, 'fc'):
            feat_dim = base_model.fc.in_features
            base_model.fc = nn.Identity()
        elif hasattr(base_model, 'head'):
            feat_dim = base_model.head.in_features
            base_model.head = nn.Identity()
        
        # Feature MLP
        self.feature_mlp = nn.Sequential(
            nn.Linear(num_features, 32),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(inplace=True)
        )
        
        # Combined classifier
        self.classifier = nn.Sequential(
            nn.Dropout(TRAINING_CONFIG['dropout']),
            nn.Linear(feat_dim + 16, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(TRAINING_CONFIG['dropout'] * 0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(TRAINING_CONFIG['dropout'] * 0.25),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, image, features):
        img_features = self.base_model(image)
        feat_embedding = self.feature_mlp(features)
        combined = torch.cat([img_features, feat_embedding], dim=1)
        return self.classifier(combined)

class MultiModelTrainer:
    def __init__(self, num_classes, num_features, save_dir=None):
        self.num_classes = num_classes
        self.num_features = num_features
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        if save_dir is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            self.save_dir = f"multimodal_600_{timestamp}"
        else:
            self.save_dir = save_dir
        
        os.makedirs(self.save_dir, exist_ok=True)
        self.results = []
        
        print(f"Using device: {self.device}")
        if torch.cuda.is_available():
            print(f"GPU: {torch.cuda.get_device_name()}")
            print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
            
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.allow_tf32 = True
            torch.backends.cuda.matmul.allow_tf32 = True
            
            if MIXED_PRECISION:
                self.scaler = torch.amp.GradScaler('cuda')
            else:
                self.scaler = None
    
    def create_model(self, model_name, model_config):
        print(f"Loading {model_name}...")
        base_model = model_config['model_fn'](weights='DEFAULT')
        
        freeze_layers = model_config.get('freeze_layers', 2)
        
        if hasattr(base_model, 'features'):
            layers_to_freeze = list(base_model.features.children())[:freeze_layers]
            for layer in layers_to_freeze:
                for param in layer.parameters():
                    param.requires_grad = False
            model_type = 'efficientnet'
        
        elif hasattr(base_model, 'fc'):
            if hasattr(base_model, 'layer1'):
                layers_to_freeze = [base_model.conv1, base_model.bn1]
                if freeze_layers >= 1:
                    layers_to_freeze.append(base_model.layer1)
                if freeze_layers >= 2:
                    layers_to_freeze.append(base_model.layer2)
            
            for layer in layers_to_freeze:
                for param in layer.parameters():
                    param.requires_grad = False
            model_type = 'resnet'
        
        model = MultiModalModel(base_model, self.num_classes, self.num_features, model_type)
        
        frozen_params = sum(p.numel() for p in model.parameters() if not p.requires_grad)
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Parameters: {trainable_params:,} trainable, {frozen_params:,} frozen")
        
        return model.to(self.device)
    
    def get_optimizer(self, model):
        backbone_params = []
        classifier_params = []
        
        for name, param in model.named_parameters():
            if param.requires_grad:
                if 'classifier' in name or 'feature_mlp' in name:
                    classifier_params.append(param)
                else:
                    backbone_params.append(param)
        
        param_groups = [
            {'params': backbone_params, 'lr': TRAINING_CONFIG['backbone_lr']},
            {'params': classifier_params, 'lr': TRAINING_CONFIG['classifier_lr']}
        ]
        
        return optim.AdamW(param_groups, weight_decay=TRAINING_CONFIG['weight_decay'], fused=True)
    
    def get_scheduler(self, optimizer):
        return optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=TRAINING_CONFIG['max_epochs'], eta_min=1e-6
        )
    
    def train_model(self, model, model_name, train_loader, val_loader, test_loader, fold_num):
        optimizer = self.get_optimizer(model)
        scheduler = self.get_scheduler(optimizer)
        criterion = nn.CrossEntropyLoss(label_smoothing=TRAINING_CONFIG['label_smoothing'])
        
        best_val_acc = 0.0
        best_train_acc = 0.0
        best_test_acc = 0.0
        patience_counter = 0
        best_state = None
        
        for epoch in range(TRAINING_CONFIG['max_epochs']):
            model.train()
            train_correct = 0
            train_total = 0
            
            for images, features, labels in train_loader:
                images = images.to(self.device)
                features = features.to(self.device)
                labels = labels.to(self.device)
                
                optimizer.zero_grad()
                
                if MIXED_PRECISION and self.scaler:
                    with torch.amp.autocast('cuda'):
                        outputs = model(images, features)
                        loss = criterion(outputs, labels)
                    
                    self.scaler.scale(loss).backward()
                    self.scaler.step(optimizer)
                    self.scaler.update()
                else:
                    outputs = model(images, features)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                
                _, predicted = torch.max(outputs, 1)
                train_total += labels.size(0)
                train_correct += (predicted == labels).sum().item()
            
            model.eval()
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for images, features, labels in val_loader:
                    images = images.to(self.device)
                    features = features.to(self.device)
                    labels = labels.to(self.device)
                    
                    if MIXED_PRECISION:
                        with torch.amp.autocast('cuda'):
                            outputs = model(images, features)
                    else:
                        outputs = model(images, features)
                    
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
            
            train_acc = 100 * train_correct / train_total
            val_acc = 100 * val_correct / val_total
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                best_train_acc = train_acc
                patience_counter = 0
                best_state = model.state_dict().copy()
                
                model.eval()
                test_correct = 0
                test_total = 0
                
                with torch.no_grad():
                    for images, features, labels in test_loader:
                        images = images.to(self.device)
                        features = features.to(self.device)
                        labels = labels.to(self.device)
                        
                        if MIXED_PRECISION:
                            with torch.amp.autocast('cuda'):
                                outputs1 = model(images, features)
                                outputs2 = model(torch.flip(images, [3]), features)
                                outputs = (outputs1 + outputs2) / 2
                        else:
                            outputs1 = model(images, features)
                            outputs2 = model(torch.flip(images, [3]), features)
                            outputs = (outputs1 + outputs2) / 2
                        
                        _, predicted = torch.max(outputs, 1)
                        test_total += labels.size(0)
                        test_correct += (predicted == labels).sum().item()
                
                best_test_acc = 100 * test_correct / test_total
            else:
                patience_counter += 1
            
            scheduler.step()
            
            if epoch % 20 == 0:
                print(f"    Epoch {epoch}: Train {train_acc:.1f}%, Val {val_acc:.1f}%")
            
            if patience_counter >= TRAINING_CONFIG['patience']:
                print(f"    Early stopping at epoch {epoch}")
                break
        
        multiplicative_score = (best_val_acc / 100) * (best_test_acc / 100) * 10000
        
        save_path = os.path.join(self.save_dir, f"{model_name}_fold{fold_num}_val{best_val_acc:.1f}_test{best_test_acc:.1f}_mult{multiplicative_score:.1f}.pth")
        torch.save({
            'model_state_dict': best_state,
            'model_name': model_name,
            'fold_number': fold_num,
            'train_accuracy': best_train_acc,
            'validation_accuracy': best_val_acc,
            'test_accuracy': best_test_acc,
            'multiplicative_score': multiplicative_score,
            'image_size': IMAGE_SIZE,
            'training_config': TRAINING_CONFIG,
            'num_features': self.num_features
        }, save_path)
        
        result = {
            'model_name': model_name,
            'fold': fold_num,
            'train_acc': best_train_acc,
            'val_acc': best_val_acc,
            'test_acc': best_test_acc,
            'multiplicative_score': multiplicative_score,
            'save_path': save_path
        }
        
        print(f"    {model_name} Fold {fold_num}: Train {best_train_acc:.1f}%, Val {best_val_acc:.1f}%, Test {best_test_acc:.1f}%, Mult {multiplicative_score:.1f}")
        
        return result
    
    def run_comprehensive_evaluation(self, images, ages, features, sources):
        print(f"MULTIMODAL EVALUATION - {len(MODEL_CONFIGS)} models, {NUM_FOLDS} folds")
        print(f"Image size: {IMAGE_SIZE[0]}x{IMAGE_SIZE[1]}")
        print(f"Feature dimensions: {self.num_features}")
        
        unique_ages = sorted(list(set(ages)))
        label_mapping = {age: i for i, age in enumerate(unique_ages)}
        y_indices = np.array([label_mapping[age] for age in ages])
        
        print(f"Classes: {len(unique_ages)}")
        
        for model_name, model_config in MODEL_CONFIGS.items():
            print(f"\n{'='*60}")
            print(f"TESTING MODEL: {model_name.upper()}")
            print(f"Batch size: {model_config['batch_size']}")
            print(f"{'='*60}")
            
            for fold in range(1, NUM_FOLDS + 1):
                print(f"\n[Fold {fold}/{NUM_FOLDS}] {model_name}")
                
                try:
                    X_train, X_test, y_train, y_test, feat_train, feat_test = train_test_split(
                        images, y_indices, features, test_size=0.2, random_state=fold * 42, stratify=y_indices
                    )
                    
                    X_train_final, X_val, y_train_final, y_val, feat_train_final, feat_val = train_test_split(
                        X_train, y_train, feat_train, test_size=0.2, random_state=fold * 42 + 1, stratify=y_train
                    )
                    
                    train_dataset = MultiModalDataset(X_train_final, y_train_final, feat_train_final,
                                                    TRAINING_CONFIG['augmentation_strength'], AUGMENTATION_TARGET, True)
                    val_dataset = MultiModalDataset(X_val, y_val, feat_val, 'light', 200, False)
                    test_dataset = MultiModalDataset(X_test, y_test, feat_test, 'light', 200, False)
                    
                    batch_size = model_config['batch_size']
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
                    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
                    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
                    
                    model = self.create_model(model_name, model_config)
                    result = self.train_model(model, model_name, train_loader, val_loader, test_loader, fold)
                    self.results.append(result)
                    
                    del model, train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader
                    torch.cuda.empty_cache()
                    gc.collect()
                    
                except Exception as e:
                    print(f"    FAILED: {str(e)}")
                    torch.cuda.empty_cache()
                    gc.collect()
                    continue
        
        self.save_results()
        self.print_summary()
    
    def save_results(self):
        results_path = os.path.join(self.save_dir, "comprehensive_results.json")
        with open(results_path, 'w') as f:
            json.dump(self.results, f, indent=2)
        print(f"\nResults saved to: {results_path}")
    
    def print_summary(self):
        print(f"\n{'='*80}")
        print("COMPREHENSIVE EVALUATION SUMMARY")
        print(f"{'='*80}")
        
        model_results = {}
        for result in self.results:
            model_name = result['model_name']
            if model_name not in model_results:
                model_results[model_name] = []
            model_results[model_name].append(result)
        
        for model_name, results in model_results.items():
            if not results:
                continue
            
            avg_train = np.mean([r['train_acc'] for r in results])
            avg_val = np.mean([r['val_acc'] for r in results])
            avg_test = np.mean([r['test_acc'] for r in results])
            avg_mult = np.mean([r['multiplicative_score'] for r in results])
            
            print(f"{model_name:20} | Train: {avg_train:5.1f}% | Val: {avg_val:5.1f}% | Test: {avg_test:5.1f}% | Mult: {avg_mult:6.1f}")
        
        if self.results:
            best_result = max(self.results, key=lambda x: x['multiplicative_score'])
            print(f"\nBest model: {best_result['model_name']} (Fold {best_result['fold']})")
            print(f"Multiplicative score: {best_result['multiplicative_score']:.1f}")
            print(f"Val: {best_result['val_acc']:.1f}%, Test: {best_result['test_acc']:.1f}%")

def main():
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()
    
    print("Multimodal Deer Age Prediction")
    print("Images + Date + State Features")
    print("=" * 60)
    
    start_time = time.time()
    
    images, ages, features, state_encoder, sources = load_combined_data()
    
    trainer = MultiModelTrainer(num_classes=len(set(ages)), num_features=features.shape[1])
    trainer.run_comprehensive_evaluation(images, ages, features, sources)
    
    elapsed = (time.time() - start_time) / 60
    print(f"\nComplete evaluation finished in: {elapsed:.1f} minutes")

if __name__ == "__main__":
    main()

Multimodal Deer Age Prediction
Images + Date + State Features
Loading color images...
Loaded 64 color images
Loading grayscale images...
Loaded 59 grayscale images
Total images: 123
Final dataset: 123 images
Age distribution: {5.5: 36, 4.5: 17, 2.5: 16, 3.5: 43, 1.5: 11}
Unique states: [np.str_('AK'), np.str_('AL'), np.str_('FL'), np.str_('GA'), np.str_('IA'), np.str_('IL'), np.str_('IN'), np.str_('KS'), np.str_('KY'), np.str_('LA'), np.str_('MI'), np.str_('MN'), np.str_('MO'), np.str_('MS'), np.str_('NC'), np.str_('NY'), np.str_('OC'), np.str_('OH'), np.str_('OK'), np.str_('PA'), np.str_('SC'), np.str_('TN'), np.str_('TX'), np.str_('VA'), np.str_('WI')]
Using device: cuda
GPU: NVIDIA GeForce RTX 5090
GPU Memory: 34.2 GB
MULTIMODAL EVALUATION - 13 models, 10 folds
Image size: 600x600
Feature dimensions: 4
Classes: 5

TESTING MODEL: EFFICIENTNET_B0
Batch size: 128

[Fold 1/10] efficientnet_b0
Dataset: 5000 samples from 78 base images
Dataset: 1000 samples from 20 base images
Dataset: 10