In [1]:
!pip install keras
import keras



In [19]:
# Image parsing

import numpy as np
import matplotlib.pyplot as plt
import cv2
from buck.analysis.basics import ingest_images

# Your existing ingestion
fpath = "..\\images\\squared\\*_NDA.png"

def read_images_convert_to_rgb(file_pattern):
    """
    Read images and convert grayscale to RGB
    
    Args:
        file_pattern: Pattern like "..\\images\\squared\\*_NDA.png"
    
    Returns:
        images: List of RGB images (all have 3 channels)
        file_paths: List of file paths
    """
    
    file_paths = glob.glob(file_pattern)
    print(f"Found {len(file_paths)} image files")
    
    images = []
    
    for i, file_path in enumerate(file_paths):
        try:
            # Load image in color
            img = cv2.imread(file_path, cv2.IMREAD_COLOR)
            
            if img is None:
                print(f"Warning: Could not load {file_path}")
                continue
            
            # Convert BGR to RGB
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            # Check if it's actually grayscale (all channels identical)
            if np.allclose(img_rgb[:,:,0], img_rgb[:,:,1]) and np.allclose(img_rgb[:,:,1], img_rgb[:,:,2]):
                print(f"Converted grayscale to RGB: {file_path}")
            
            # All images are now RGB regardless of original format
            images.append(img_rgb)
            
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
            continue
    
    print(f"Loaded {len(images)} images, all converted to RGB")
    
    return images, file_paths[:len(images)]

images, paths = read_images_convert_to_rgb(fpath)

Found 225 image files
Converted grayscale to RGB: ..\images\squared\241205_240927_TX_3p5_NDA.png
Converted grayscale to RGB: ..\images\squared\250501_241108_PA_8p5_NDA.png
Loaded 225 images, all converted to RGB


In [23]:
# Extract / combine dates

import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras

def extract_age_from_path(file_path):
    """
    Extract age from file path using your exact working logic
    
    Args:
        file_path: Full path to image file
    
    Returns:
        age: Float age (e.g., 2.5 for "2p5") or None if invalid
    """
    try:
        age_part = file_path.split("\\")[-1].split(".")[0].split("_")[3]
        
        # Skip invalid age parts like "xpx"
        if 'x' in age_part or len(age_part) < 2:
            return None
            
        age_float = float(age_part.replace("p", "."))
        
        # Validate reasonable deer age
        if 0.5 <= age_float <= 15.5:
            return age_float
        else:
            return None
            
    except (IndexError, ValueError):
        return None

def split_images_with_ages(images, file_paths, target_size=(224, 224), test_size=0.2, val_size=0.2, random_state=42):
    """
    Split images into train/test/validation sets and extract ages
    
    Args:
        images: List of RGB images
        file_paths: List of corresponding file paths
        target_size: Resize all images to this size for uniform arrays
        test_size: Fraction for test set (default 0.2 = 20%)
        val_size: Fraction of remaining data for validation (default 0.2 = 20%)
        random_state: Random seed for reproducibility
    
    Returns:
        X_train, X_val, X_test: Image arrays
        y_train, y_val, y_test: Age labels (one-hot encoded)
        label_mapping: Dictionary mapping class indices to ages
    """
    
    print("Extracting ages from filenames...")
    
    # Extract ages and resize images
    ages = []
    resized_images = []
    
    for i, (image, file_path) in enumerate(zip(images, file_paths)):
        try:
            age = extract_age_from_path(file_path)
            
            if age is not None:
                # Resize image to target size for uniform array
                import cv2
                resized_img = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
                
                ages.append(age)
                resized_images.append(resized_img)
            else:
                print(f"Skipping invalid age in: {file_path}")
                
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            continue
    
    print(f"Successfully processed {len(ages)} images")
    
    # Group ages: all ages 5.5 and over become 5.5 (mature deer)
    print("Applying age grouping: ages 5.5+ -> 5.5")
    original_ages = ages.copy()
    ages_grouped = []
    
    for age in ages:
        if age >= 5.5:
            ages_grouped.append(5.5)
        else:
            ages_grouped.append(age)
    
    # Show original vs grouped distribution
    print("Original age distribution:")
    unique_original = sorted(list(set(original_ages)))
    for age in unique_original:
        count = original_ages.count(age)
        print(f"  Age {age}: {count} images")
    
    print("Grouped age distribution:")
    unique_ages = sorted(list(set(ages_grouped)))
    for age in unique_ages:
        count = ages_grouped.count(age)
        print(f"  Age {age}: {count} images")
    
    # Use grouped ages for the rest of the process
    ages = ages_grouped
    
    # Convert to numpy arrays (now all images have same size)
    X = np.array(resized_images)
    y_raw = np.array(ages)
    
    print(f"Image array shape: {X.shape}")
    print(f"Ages array shape: {y_raw.shape}")
    
    # Create label mapping (age -> class index)
    label_mapping = {age: idx for idx, age in enumerate(unique_ages)}
    reverse_mapping = {idx: age for age, idx in label_mapping.items()}
    
    print(f"Label mapping: {label_mapping}")
    
    # Convert ages to class indices
    y_indices = np.array([label_mapping[age] for age in ages])
    
    # Check class counts for stratification
    unique_classes, class_counts = np.unique(y_indices, return_counts=True)
    min_class_count = np.min(class_counts)
    
    print(f"Minimum class count: {min_class_count}")
    
    # If any class has fewer than 2 samples, we can't use stratification
    if min_class_count < 2:
        print("Warning: Some classes have only 1 sample. Cannot use stratified splitting.")
        print("Using random splitting instead.")
        
        # First split: separate test set (no stratification)
        X_temp, X_test, y_temp, y_test = train_test_split(
            X, y_indices, 
            test_size=test_size, 
            random_state=random_state
        )
        
        # Second split: separate train and validation (no stratification)
        val_size_adjusted = val_size / (1 - test_size)
        
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp,
            test_size=val_size_adjusted,
            random_state=random_state
        )
        
    else:
        print("Using stratified splitting to maintain class balance.")
        
        # First split: separate test set (with stratification)
        X_temp, X_test, y_temp, y_test = train_test_split(
            X, y_indices, 
            test_size=test_size, 
            random_state=random_state,
            stratify=y_indices
        )
        
        # Second split: separate train and validation (with stratification)
        val_size_adjusted = val_size / (1 - test_size)
        
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp,
            test_size=val_size_adjusted,
            random_state=random_state,
            stratify=y_temp
        )
    
    # Convert to one-hot encoding
    num_classes = len(unique_ages)
    y_train_onehot = keras.utils.to_categorical(y_train, num_classes)
    y_val_onehot = keras.utils.to_categorical(y_val, num_classes)
    y_test_onehot = keras.utils.to_categorical(y_test, num_classes)
    
    print(f"\nData split completed:")
    print(f"  Training set: {X_train.shape[0]} images")
    print(f"  Validation set: {X_val.shape[0]} images")
    print(f"  Test set: {X_test.shape[0]} images")
    print(f"  Number of classes: {num_classes}")
    
    # Show class distribution in each set
    print(f"\nClass distribution:")
    for split_name, y_split in [("Train", y_train), ("Val", y_val), ("Test", y_test)]:
        print(f"  {split_name}:")
        for class_idx in range(num_classes):
            count = np.sum(y_split == class_idx)
            age = reverse_mapping[class_idx]
            print(f"    Age {age}: {count} images")
    
    return X_train, X_val, X_test, y_train_onehot, y_val_onehot, y_test_onehot, label_mapping

# Test usage:
X_train, X_val, X_test, y_train, y_val, y_test, mapping = split_images_with_ages(images, paths)

Extracting ages from filenames...
Skipping invalid age in: ..\images\squared\250522_241221_IN_xpx_NDA.png
Successfully processed 224 images
Applying age grouping: ages 5.5+ -> 5.5
Original age distribution:
  Age 1.5: 31 images
  Age 2.5: 39 images
  Age 3.5: 47 images
  Age 4.5: 54 images
  Age 5.5: 41 images
  Age 6.5: 6 images
  Age 8.5: 5 images
  Age 12.5: 1 images
Grouped age distribution:
  Age 1.5: 31 images
  Age 2.5: 39 images
  Age 3.5: 47 images
  Age 4.5: 54 images
  Age 5.5: 53 images
Image array shape: (224, 224, 224, 3)
Ages array shape: (224,)
Label mapping: {1.5: 0, 2.5: 1, 3.5: 2, 4.5: 3, 5.5: 4}
Minimum class count: 31
Using stratified splitting to maintain class balance.

Data split completed:
  Training set: 134 images
  Validation set: 45 images
  Test set: 45 images
  Number of classes: 5

Class distribution:
  Train:
    Age 1.5: 19 images
    Age 2.5: 23 images
    Age 3.5: 29 images
    Age 4.5: 32 images
    Age 5.5: 31 images
  Val:
    Age 1.5: 6 images
  

In [26]:
# Homogenize data

from buck.analysis.basics import homogenize_data

#augment_multiplier = 40
#X_train_pca, y_train_flat, X_test_pca, y_true, label_mapping, num_classes = homogenize_data(Xtr_og, ytr_og, Xte,yte_onehot, l_map, augment_multiplier)
import numpy as np
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import gc

def augment_and_balance_data(X_train, y_train, target_samples_per_class=None, augmentation_factor=5):
    """
    Augment training data and balance classes to have equal representation
    
    Args:
        X_train: Training images array (N, H, W, 3)
        y_train: Training labels (one-hot encoded)
        target_samples_per_class: Target number of samples per class (if None, uses max class count * augmentation_factor)
        augmentation_factor: Multiplier for the largest class to determine target size
    
    Returns:
        X_train_balanced: Balanced and augmented training data
        y_train_balanced: Corresponding balanced labels
    """
    
    print("Starting data augmentation and class balancing...")
    print(f"Input data shape: {X_train.shape}")
    print(f"Input labels shape: {y_train.shape}")
    
    # Convert one-hot to class indices
    y_train_indices = np.argmax(y_train, axis=1)
    num_classes = y_train.shape[1]
    
    # Count samples per class
    unique_classes, class_counts = np.unique(y_train_indices, return_counts=True)
    
    print("Current class distribution:")
    for class_idx, count in zip(unique_classes, class_counts):
        print(f"  Class {class_idx}: {count} samples")
    
    # Determine target samples per class
    max_count = np.max(class_counts)
    if target_samples_per_class is None:
        target_samples_per_class = max_count * augmentation_factor
    
    print(f"Target samples per class: {target_samples_per_class}")
    
    # Create augmentation generator
    datagen = ImageDataGenerator(
        rotation_range=15,           # Rotate images up to 15 degrees
        width_shift_range=0.1,       # Shift horizontally up to 10%
        height_shift_range=0.1,      # Shift vertically up to 10%
        shear_range=0.1,            # Shear transformation
        zoom_range=0.1,             # Zoom in/out up to 10%
        horizontal_flip=True,       # Random horizontal flips
        brightness_range=[0.8, 1.2], # Brightness variation
        fill_mode='nearest'         # Fill strategy for new pixels
    )
    
    # Store augmented data
    X_balanced_list = []
    y_balanced_list = []
    
    # Process each class
    for class_idx in range(num_classes):
        print(f"\nProcessing class {class_idx}...")
        
        # Get all samples for this class
        class_mask = y_train_indices == class_idx
        X_class = X_train[class_mask]
        current_count = len(X_class)
        
        print(f"  Current samples: {current_count}")
        print(f"  Target samples: {target_samples_per_class}")
        
        if current_count == 0:
            print(f"  Warning: No samples for class {class_idx}")
            continue
        
        # Add original samples
        X_class_augmented = list(X_class)
        
        # Calculate how many more samples we need
        samples_needed = target_samples_per_class - current_count
        
        if samples_needed > 0:
            print(f"  Generating {samples_needed} augmented samples...")
            
            # Normalize images for augmentation (0-255 -> 0-1)
            X_class_norm = X_class.astype('float32') / 255.0
            
            # Generate augmented samples
            augmented_count = 0
            batch_size = min(32, current_count)  # Process in batches
            
            while augmented_count < samples_needed:
                # How many samples to generate in this batch
                batch_samples_needed = min(batch_size, samples_needed - augmented_count)
                
                # Randomly select source images for this batch
                source_indices = np.random.choice(current_count, size=batch_samples_needed, replace=True)
                X_batch = X_class_norm[source_indices]
                
                # Generate augmented images
                aug_iter = datagen.flow(X_batch, batch_size=batch_samples_needed, shuffle=False)
                X_aug_batch = next(aug_iter)
                
                # Convert back to 0-255 range
                X_aug_batch = (X_aug_batch * 255).astype(np.uint8)
                
                # Add to our collection
                for img in X_aug_batch:
                    if augmented_count < samples_needed:
                        X_class_augmented.append(img)
                        augmented_count += 1
                
                # Progress update
                if augmented_count % 100 == 0 or augmented_count >= samples_needed:
                    print(f"    Generated {augmented_count}/{samples_needed} samples")
        
        elif samples_needed < 0:
            # Randomly downsample if we have too many
            print(f"  Downsampling from {current_count} to {target_samples_per_class}")
            indices = np.random.choice(current_count, size=target_samples_per_class, replace=False)
            X_class_augmented = [X_class[i] for i in indices]
        
        # Convert to numpy array and add to balanced dataset
        X_class_final = np.array(X_class_augmented)
        y_class_final = np.full(len(X_class_final), class_idx)
        
        X_balanced_list.append(X_class_final)
        y_balanced_list.append(y_class_final)
        
        print(f"  Final samples for class {class_idx}: {len(X_class_final)}")
        
        # Clean up memory
        del X_class_augmented, X_class_final
        gc.collect()
    
    # Combine all classes
    print("\nCombining all classes...")
    X_train_balanced = np.concatenate(X_balanced_list, axis=0)
    y_train_indices_balanced = np.concatenate(y_balanced_list, axis=0)
    
    # Convert back to one-hot encoding
    from tensorflow import keras
    y_train_balanced = keras.utils.to_categorical(y_train_indices_balanced, num_classes)
    
    # Shuffle the combined dataset
    print("Shuffling combined dataset...")
    shuffle_indices = np.random.permutation(len(X_train_balanced))
    X_train_balanced = X_train_balanced[shuffle_indices]
    y_train_balanced = y_train_balanced[shuffle_indices]
    
    print(f"\nFinal balanced dataset:")
    print(f"  Shape: {X_train_balanced.shape}")
    print(f"  Labels shape: {y_train_balanced.shape}")
    
    # Verify class balance
    final_indices = np.argmax(y_train_balanced, axis=1)
    final_unique, final_counts = np.unique(final_indices, return_counts=True)
    
    print("Final class distribution:")
    for class_idx, count in zip(final_unique, final_counts):
        print(f"  Class {class_idx}: {count} samples")
    
    # Clean up memory
    del X_balanced_list, y_balanced_list
    gc.collect()
    
    return X_train_balanced, y_train_balanced

def create_simple_augmentation(image):
    """
    Simple augmentation function for individual images
    """
    
    # Random rotation (-10 to 10 degrees)
    if np.random.random() > 0.5:
        angle = np.random.uniform(-10, 10)
        center = (image.shape[1]//2, image.shape[0]//2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
    
    # Random horizontal flip
    if np.random.random() > 0.5:
        image = cv2.flip(image, 1)
    
    # Random brightness adjustment
    if np.random.random() > 0.5:
        brightness = np.random.uniform(0.8, 1.2)
        image = np.clip(image * brightness, 0, 255).astype(np.uint8)
    
    # Random zoom (scale between 0.9 and 1.1)
    if np.random.random() > 0.5:
        scale = np.random.uniform(0.9, 1.1)
        h, w = image.shape[:2]
        new_h, new_w = int(h * scale), int(w * scale)
        
        if scale > 1:
            # Zoom in - resize then crop center
            resized = cv2.resize(image, (new_w, new_h))
            start_x = (new_w - w) // 2
            start_y = (new_h - h) // 2
            image = resized[start_y:start_y+h, start_x:start_x+w]
        else:
            # Zoom out - resize then pad
            resized = cv2.resize(image, (new_w, new_h))
            # Create black canvas and place resized image in center
            canvas = np.zeros_like(image)
            start_x = (w - new_w) // 2
            start_y = (h - new_h) // 2
            canvas[start_y:start_y+new_h, start_x:start_x+new_w] = resized
            image = canvas
    
    return image

# Test usage:
X_train_balanced, y_train_balanced = augment_and_balance_data(X_train, y_train, target_samples_per_class=500)
print("Data augmentation and balancing functions loaded!")
print("Usage:")
print("X_train_balanced, y_train_balanced = augment_and_balance_data(X_train, y_train, target_samples_per_class=500)")

Starting data augmentation and class balancing...
Input data shape: (134, 224, 224, 3)
Input labels shape: (134, 5)
Current class distribution:
  Class 0: 19 samples
  Class 1: 23 samples
  Class 2: 29 samples
  Class 3: 32 samples
  Class 4: 31 samples
Target samples per class: 500

Processing class 0...
  Current samples: 19
  Target samples: 500
  Generating 481 augmented samples...
    Generated 481/481 samples
  Final samples for class 0: 500

Processing class 1...
  Current samples: 23
  Target samples: 500
  Generating 477 augmented samples...
    Generated 477/477 samples
  Final samples for class 1: 500

Processing class 2...
  Current samples: 29
  Target samples: 500
  Generating 471 augmented samples...
    Generated 471/471 samples
  Final samples for class 2: 500

Processing class 3...
  Current samples: 32
  Target samples: 500
  Generating 468 augmented samples...
    Generated 468/468 samples
  Final samples for class 3: 500

Processing class 4...
  Current samples: 31

In [33]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import (
    ResNet50, ResNet101, EfficientNetB0, EfficientNetB3, EfficientNetB5,
    DenseNet121, DenseNet169, ConvNeXtTiny, InceptionV3, VGG16
)
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

def create_body_morphology_model(base_model_name, input_shape, num_classes, architecture='multi_scale'):
    """
    Create transfer learning models optimized for deer body morphology analysis
    
    Args:
        base_model_name: Pre-trained model ('ResNet50', 'EfficientNetB3', etc.)
        input_shape: Input shape (224, 224, 3)
        num_classes: Number of age classes
        architecture: 'standard', 'multi_scale', 'attention', or 'boosted_attention'
    
    Returns:
        model: Compiled Keras model
    """
    
    # Model selection
    base_models = {
        'ResNet50': ResNet50,
        'ResNet101': ResNet101,
        'EfficientNetB0': EfficientNetB0,
        'EfficientNetB3': EfficientNetB3,
        'EfficientNetB5': EfficientNetB5,
        'DenseNet121': DenseNet121,
        'DenseNet169': DenseNet169,
        'ConvNeXtTiny': ConvNeXtTiny,
        'InceptionV3': InceptionV3,
        'VGG16': VGG16
    }
    
    if base_model_name not in base_models:
        raise ValueError(f"Unsupported model: {base_model_name}")
    
    print(f"Creating {base_model_name} model with {architecture} architecture...")
    
    # BOOSTED DENSENET169 ATTENTION - Enhanced version
    if architecture == 'boosted_attention' and base_model_name == 'DenseNet169':
        print("🚀 Creating BOOSTED DenseNet169 with enhanced attention mechanisms...")
        
        # Load pre-trained DenseNet169
        base_model = base_models[base_model_name](
            weights='imagenet',
            include_top=False,
            input_shape=input_shape
        )
        base_model.trainable = False
        
        inputs = base_model.input
        base_features = base_model.output  # Shape: (batch, H, W, 1664)
        
        # ============ MULTI-SCALE ATTENTION ============
        print("  Adding multi-scale attention for body parts...")
        
        # Global attention (full body proportions)
        global_attention = layers.Conv2D(1, 1, activation='sigmoid', name='global_body_attention')(base_features)
        global_attended = layers.Multiply(name='global_body_focus')([base_features, global_attention])
        
        # Regional attention (body segments: head/neck, torso, legs)
        regional_features = layers.AveragePooling2D(2, name='regional_pool')(base_features)
        regional_attention = layers.Conv2D(1, 1, activation='sigmoid', name='regional_body_attention')(regional_features)
        regional_attended = layers.Multiply(name='regional_body_focus')([regional_features, regional_attention])
        regional_upsampled = layers.UpSampling2D(2, name='regional_upsample')(regional_attended)
        
        # Local attention (fine details: antler development, facial features)
        local_features = layers.MaxPooling2D(4, name='local_pool')(base_features)
        local_attention = layers.Conv2D(1, 1, activation='sigmoid', name='local_detail_attention')(local_features)
        local_attended = layers.Multiply(name='local_detail_focus')([local_features, local_attention])
        local_upsampled = layers.UpSampling2D(4, name='local_upsample')(local_attended)
        
        # Combine multi-scale attention
        combined_attention = layers.Add(name='multi_scale_body_combine')([global_attended, regional_upsampled, local_upsampled])
        
        # ============ CHANNEL ATTENTION (SE Block) ============
        print("  Adding Squeeze-and-Excitation for feature importance...")
        
        def se_block(input_tensor, reduction=16, name_prefix='se'):
            channels = input_tensor.shape[-1]
            # Squeeze: Global average pooling
            squeeze = layers.GlobalAveragePooling2D(name=f'{name_prefix}_squeeze')(input_tensor)
            # Excitation: FC layers
            excitation = layers.Dense(channels // reduction, activation='relu', name=f'{name_prefix}_fc1')(squeeze)
            excitation = layers.Dense(channels, activation='sigmoid', name=f'{name_prefix}_fc2')(excitation)
            # Reshape and multiply
            excitation = layers.Reshape((1, 1, channels), name=f'{name_prefix}_reshape')(excitation)
            scaled = layers.Multiply(name=f'{name_prefix}_scale')([input_tensor, excitation])
            return scaled
        
        se_features = se_block(combined_attention, reduction=8, name_prefix='body_morphology_se')
        
        # ============ BODY PART SPECIFIC ATTENTION ============
        print("  Adding deer-specific body part attention...")
        
        def body_part_attention_block(features, part_name):
            part_conv = layers.Conv2D(256, 3, padding='same', activation='relu', 
                                    name=f'{part_name}_analysis')(features)
            part_attention = layers.Conv2D(1, 1, activation='sigmoid', 
                                         name=f'{part_name}_attention_map')(part_conv)
            part_attended = layers.Multiply(name=f'{part_name}_focused_features')([features, part_attention])
            return part_attended
        
        # Neck/head region (important for antler development)
        neck_features = body_part_attention_block(se_features, 'neck_head')
        # Torso/belly region (body condition indicator)
        torso_features = body_part_attention_block(se_features, 'torso_belly')
        # Legs region (proportional development)
        legs_features = body_part_attention_block(se_features, 'legs')
        
        # Combine body part features
        body_part_combined = layers.Add(name='body_parts_fusion')([neck_features, torso_features, legs_features])
        
        # ============ SPATIAL ATTENTION ============
        print("  Adding spatial attention for anatomical regions...")
        
        # Average and max pooling across channels for spatial focus
        avg_pool = layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1, keepdims=True), 
                               name='spatial_avg')(body_part_combined)
        max_pool = layers.Lambda(lambda x: tf.reduce_max(x, axis=-1, keepdims=True), 
                               name='spatial_max')(body_part_combined)
        
        # Learn spatial attention map
        spatial_concat = layers.Concatenate(axis=-1, name='spatial_concat')([avg_pool, max_pool])
        spatial_attention = layers.Conv2D(1, 7, padding='same', activation='sigmoid', 
                                        name='spatial_attention_map')(spatial_concat)
        
        # Apply spatial attention
        spatial_attended = layers.Multiply(name='spatially_focused_features')([body_part_combined, spatial_attention])
        
        # ============ ENHANCED FEATURE POOLING ============
        print("  Adding enhanced feature extraction...")
        
        # Multiple pooling strategies for comprehensive feature extraction
        global_avg = layers.GlobalAveragePooling2D(name='comprehensive_global_avg')(spatial_attended)
        global_max = layers.GlobalMaxPooling2D(name='comprehensive_global_max')(spatial_attended)
        
        # Adaptive pooling for spatial relationships
        adaptive_pool = layers.AveragePooling2D(pool_size=2, name='adaptive_spatial_pool')(spatial_attended)
        adaptive_flat = layers.Flatten(name='adaptive_spatial_flat')(adaptive_pool)
        
        # Combine all pooling strategies
        combined_features = layers.Concatenate(name='comprehensive_feature_fusion')([global_avg, global_max, adaptive_flat])
        
        # ============ ENHANCED CLASSIFICATION HEAD ============
        print("  Building enhanced classification layers...")
        
        x = combined_features
        
        # Progressive dense layers with residual connections and high capacity
        dense_sizes = [1536, 1024, 512, 256]
        
        for i, units in enumerate(dense_sizes):
            # Main dense layer
            main = layers.Dense(units, activation='relu', name=f'boosted_dense_{i+1}')(x)
            main = layers.BatchNormalization(name=f'boosted_bn_{i+1}')(main)
            main = layers.Dropout(0.4 + i*0.1, name=f'boosted_dropout_{i+1}')(main)  # Increasing dropout
            
            # Residual connection if dimensions allow
            if x.shape[-1] == units:
                x = layers.Add(name=f'boosted_residual_{i+1}')([x, main])
            else:
                x = main
        
        # Deer morphology understanding layer
        x = layers.Dense(128, activation='relu', name='deer_morphology_understanding')(x)
        x = layers.BatchNormalization(name='deer_morphology_bn')(x)
        x = layers.Dropout(0.3, name='deer_morphology_dropout')(x)
        
        # Final age prediction
        predictions = layers.Dense(num_classes, activation='softmax', name='enhanced_age_prediction')(x)
        
        model = keras.Model(inputs=inputs, outputs=predictions, name='BoostedDenseNet169_SuperAttention')
        
        print(f"✅ Boosted DenseNet169 created with {model.count_params():,} parameters")
        return model
    
    # Load pre-trained base model for other architectures
    base_model = base_models[base_model_name](
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze base model initially
    base_model.trainable = False
    
    if architecture == 'multi_scale':
        # Multi-scale approach for body parts analysis (neck, legs, belly)
        base_features = base_model.output
        
        # Global features (overall body shape and proportions)
        global_pool = layers.GlobalAveragePooling2D(name='global_body_shape')(base_features)
        
        # Local features (specific body parts: neck thickness, leg length, belly width)
        local_pool = layers.GlobalMaxPooling2D(name='local_body_parts')(base_features)
        
        # Spatial features (preserve spatial relationships between body parts)
        spatial_pool = layers.AveragePooling2D(pool_size=4, name='spatial_relationships')(base_features)
        spatial_flat = layers.Flatten(name='flatten_spatial')(spatial_pool)
        
        # Combine all feature types for comprehensive body analysis
        combined = layers.Concatenate(name='body_feature_fusion')([global_pool, local_pool, spatial_flat])
        
        # Body morphology analysis layers
        x = layers.Dense(1024, activation='relu', name='morphology_analysis')(combined)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.4)(x)
        
        # Deer-specific body characteristics
        x = layers.Dense(512, activation='relu', name='deer_body_features')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.3)(x)
        
        # Age-related morphological features
        x = layers.Dense(256, activation='relu', name='age_morphology')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        
        # Final age classification
        predictions = layers.Dense(num_classes, activation='softmax', name='age_classification')(x)
        
        model = keras.Model(inputs=base_model.input, outputs=predictions, name=f'{base_model_name}_MultiScale')
        
    elif architecture == 'attention':
        # Attention mechanism to focus on important body regions
        base_features = base_model.output
        
        # Spatial attention
        attention_weights = layers.Conv2D(1, 1, activation='sigmoid', name='attention_map')(base_features)
        attended_features = layers.Multiply(name='attended_features')([base_features, attention_weights])
        
        # Global pooling of attended features
        global_features = layers.GlobalAveragePooling2D(name='attended_global')(attended_features)
        
        # Regular global features for comparison
        regular_features = layers.GlobalAveragePooling2D(name='regular_global')(base_features)
        
        # Combine attended and regular features
        combined = layers.Concatenate(name='attention_combined')([global_features, regular_features])
        
        # Classification layers
        x = layers.Dense(512, activation='relu', name='attention_analysis')(combined)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.4)(x)
        
        x = layers.Dense(256, activation='relu', name='body_understanding')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.3)(x)
        
        predictions = layers.Dense(num_classes, activation='softmax', name='age_prediction')(x)
        
        model = keras.Model(inputs=base_model.input, outputs=predictions, name=f'{base_model_name}_Attention')
        
    else:  # 'standard'
        # Standard approach
        model = keras.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            
            layers.Dense(512, activation='relu', name='body_features'),
            layers.BatchNormalization(),
            layers.Dropout(0.4),
            
            layers.Dense(256, activation='relu', name='morphology_features'),
            layers.BatchNormalization(),
            layers.Dropout(0.3),
            
            layers.Dense(num_classes, activation='softmax', name='age_prediction')
        ], name=f'{base_model_name}_Standard')
    
    return model

def get_preprocessing_function(model_name):
    """Get the appropriate preprocessing function for each model"""
    
    preprocessing_map = {
        'ResNet50': tf.keras.applications.resnet50.preprocess_input,
        'ResNet101': tf.keras.applications.resnet.preprocess_input,
        'EfficientNetB0': tf.keras.applications.efficientnet.preprocess_input,
        'EfficientNetB3': tf.keras.applications.efficientnet.preprocess_input,
        'EfficientNetB5': tf.keras.applications.efficientnet.preprocess_input,
        'DenseNet121': tf.keras.applications.densenet.preprocess_input,
        'DenseNet169': tf.keras.applications.densenet.preprocess_input,
        'ConvNeXtTiny': tf.keras.applications.convnext.preprocess_input,
        'InceptionV3': tf.keras.applications.inception_v3.preprocess_input,
        'VGG16': tf.keras.applications.vgg16.preprocess_input,
    }
    
    return preprocessing_map.get(model_name, lambda x: x.astype('float32') / 255.0)

def train_transfer_learning_model(model, X_train, y_train, X_val, y_val, model_name, 
                                  preprocessing_fn=None, epochs_phase1=15, epochs_phase2=10, 
                                  use_advanced_training=False):
    """
    Two-phase training: frozen features then fine-tuning
    Enhanced training for boosted models
    
    Args:
        model: Keras model
        X_train, y_train: Training data and labels
        X_val, y_val: Validation data and labels
        model_name: Name for logging
        preprocessing_fn: Preprocessing function for the model
        epochs_phase1: Epochs for phase 1 (frozen base)
        epochs_phase2: Epochs for phase 2 (fine-tuning)
        use_advanced_training: Use enhanced training for boosted models
    
    Returns:
        model: Trained model
        history: Training history
    """
    
    print(f"\n=== TRAINING {model_name} ===")
    print(f"Training data: {X_train.shape}")
    print(f"Validation data: {X_val.shape}")
    
    # Preprocess data
    if preprocessing_fn is not None:
        print("Applying model-specific preprocessing...")
        X_train_prep = preprocessing_fn(X_train.astype('float32'))
        X_val_prep = preprocessing_fn(X_val.astype('float32'))
    else:
        print("Using standard normalization...")
        X_train_prep = X_train.astype('float32') / 255.0
        X_val_prep = X_val.astype('float32') / 255.0
    
    # Enhanced training for boosted models
    if use_advanced_training and 'Boosted' in model_name:
        print("🚀 Using ADVANCED TRAINING for boosted model...")
        
        # Apply label smoothing for better generalization
        def apply_label_smoothing(y, smoothing=0.1):
            num_classes = y.shape[1]
            return y * (1 - smoothing) + smoothing / num_classes
        
        y_train_smooth = apply_label_smoothing(y_train, smoothing=0.1)
        
        # Apply mixup augmentation
        def apply_mixup(X, y, alpha=0.2):
            batch_size = X.shape[0]
            lam = np.random.beta(alpha, alpha, batch_size)
            lam = np.maximum(lam, 1 - lam)
            
            lam_img = lam.reshape(-1, 1, 1, 1)
            lam_label = lam.reshape(-1, 1)
            
            indices = np.random.permutation(batch_size)
            
            X_mixed = lam_img * X + (1 - lam_img) * X[indices]
            y_mixed = lam_label * y + (1 - lam_label) * y[indices]
            
            return X_mixed, y_mixed
        
        print("  Applying mixup augmentation...")
        X_train_mixed, y_train_mixed = apply_mixup(X_train_prep, y_train_smooth, alpha=0.2)
        
        # Use mixed data for training
        X_train_final = X_train_mixed
        y_train_final = y_train_mixed
        
        # Enhanced callbacks
        callbacks_phase1 = [
            keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=8,
                restore_best_weights=True,
                verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.3,
                patience=3,
                min_lr=1e-8,
                verbose=1
            )
        ]
        
        callbacks_phase2 = [
            keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=10,
                restore_best_weights=True,
                verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.2,
                patience=2,
                min_lr=1e-9,
                verbose=1
            )
        ]
        
        # Use label smoothing loss
        loss_fn = keras.losses.CategoricalCrossentropy(label_smoothing=0.1)
        
    else:
        # Standard training
        X_train_final = X_train_prep
        y_train_final = y_train
        loss_fn = 'categorical_crossentropy'
        
        callbacks_phase1 = [
            keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=5,
                restore_best_weights=True,
                verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=3,
                min_lr=1e-7,
                verbose=1
            )
        ]
        
        callbacks_phase2 = [
            keras.callbacks.EarlyStopping(
                monitor='val_accuracy',
                patience=7,
                restore_best_weights=True,
                verbose=1
            ),
            keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.3,
                patience=3,
                min_lr=1e-8,
                verbose=1
            )
        ]
    
    # Phase 1: Train classifier head only
    print("\nPHASE 1: Training classifier head (base model frozen)...")
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss=loss_fn,
        metrics=['accuracy']
    )
    
    history_1 = model.fit(
        X_train_final, y_train_final,
        batch_size=16,
        epochs=epochs_phase1,
        validation_data=(X_val_prep, y_val),
        callbacks=callbacks_phase1,
        verbose=1
    )
    
    # Phase 2: Fine-tune top layers
    print("\nPHASE 2: Fine-tuning top layers...")
    
    # Unfreeze top layers of base model
    base_model = None
    if hasattr(model, 'layers') and len(model.layers) > 0:
        if hasattr(model.layers[0], 'layers'):  # Sequential or Functional model
            base_model = model.layers[0]
        else:
            # Find the base model layer
            for layer in model.layers:
                if hasattr(layer, 'layers') and len(layer.layers) > 10:
                    base_model = layer
                    break
    
    if base_model is not None and hasattr(base_model, 'layers'):
        total_layers = len(base_model.layers)
        
        # More aggressive unfreezing for boosted models
        if 'Boosted' in model_name:
            unfreeze_from = int(total_layers * 0.7)  # Unfreeze top 30%
        else:
            unfreeze_from = int(total_layers * 0.8)  # Unfreeze top 20%
        
        for layer in base_model.layers[unfreeze_from:]:
            layer.trainable = True
        
        print(f"Unfroze {total_layers - unfreeze_from} layers out of {total_layers}")
    else:
        print("Could not find base model for fine-tuning")
    
    # Recompile with lower learning rate
    fine_tune_lr = 0.00005 if 'Boosted' in model_name else 0.0001
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=fine_tune_lr),
        loss=loss_fn,
        metrics=['accuracy']
    )
    
    # Smaller batch size for fine-tuning stability
    fine_tune_batch = 4 if 'Boosted' in model_name else 8
    
    history_2 = model.fit(
        X_train_final, y_train_final,
        batch_size=fine_tune_batch,
        epochs=epochs_phase2,
        validation_data=(X_val_prep, y_val),
        callbacks=callbacks_phase2,
        verbose=1
    )
    
    # Optional Phase 3 for boosted models
    if use_advanced_training and 'Boosted' in model_name:
        print("\nPHASE 3: Ultra-fine-tuning (boosted model only)...")
        
        # Unfreeze even more layers
        if base_model is not None:
            unfreeze_from_phase3 = int(total_layers * 0.5)  # Unfreeze top 50%
            for layer in base_model.layers[unfreeze_from_phase3:]:
                layer.trainable = True
            print(f"Phase 3: Unfroze {total_layers - unfreeze_from_phase3} layers out of {total_layers}")
        
        # Very low learning rate
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=0.000005),
            loss=loss_fn,
            metrics=['accuracy']
        )
        
        history_3 = model.fit(
            X_train_final, y_train_final,
            batch_size=2,
            epochs=5,
            validation_data=(X_val_prep, y_val),
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)
            ],
            verbose=1
        )
        
        # Combine histories
        history = {
            'phase1': history_1,
            'phase2': history_2,
            'phase3': history_3
        }
    else:
        # Combine histories
        history = {
            'phase1': history_1,
            'phase2': history_2
        }
    
    # Preprocess data
    if preprocessing_fn is not None:
        print("Applying model-specific preprocessing...")
        X_train_prep = preprocessing_fn(X_train.astype('float32'))
        X_val_prep = preprocessing_fn(X_val.astype('float32'))
    else:
        print("Using standard normalization...")
        X_train_prep = X_train.astype('float32') / 255.0
        X_val_prep = X_val.astype('float32') / 255.0
        X_val_prep = preprocessing_fn(X_val.astype('float32'))
    else:
        print("Using standard normalization...")
        X_train_prep = X_train.astype('float32') / 255.0
        X_val_prep = X_val.astype('float32') / 255.0
    
    # Phase 1: Train classifier head only
    print("\nPHASE 1: Training classifier head (base model frozen)...")
    
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    callbacks_phase1 = [
        keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=5,
            restore_best_weights=True,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3,
            min_lr=1e-7,
            verbose=1
        )
    ]
    
    history_1 = model.fit(
        X_train_prep, y_train,
        batch_size=16,
        epochs=epochs_phase1,
        validation_data=(X_val_prep, y_val),
        callbacks=callbacks_phase1,
        verbose=1
    )
    
    # Phase 2: Fine-tune top layers
    print("\nPHASE 2: Fine-tuning top layers...")
    
    # Unfreeze top 20% of base model layers
    base_model = None
    if hasattr(model, 'layers') and len(model.layers) > 0:
        if hasattr(model.layers[0], 'layers'):  # Sequential or Functional model
            base_model = model.layers[0]
        else:
            # Find the base model layer
            for layer in model.layers:
                if hasattr(layer, 'layers') and len(layer.layers) > 10:
                    base_model = layer
                    break
    
    if base_model is not None and hasattr(base_model, 'layers'):
        total_layers = len(base_model.layers)
        unfreeze_from = int(total_layers * 0.8)  # Unfreeze top 20%
        
        for layer in base_model.layers[unfreeze_from:]:
            layer.trainable = True
        
        print(f"Unfroze {total_layers - unfreeze_from} layers out of {total_layers}")
    else:
        print("Could not find base model for fine-tuning")
    
    # Recompile with lower learning rate
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    callbacks_phase2 = [
        keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=7,
            restore_best_weights=True,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.3,
            patience=3,
            min_lr=1e-8,
            verbose=1
        )
    ]
    
    history_2 = model.fit(
        X_train_prep, y_train,
        batch_size=8,  # Smaller batch for stability
        epochs=epochs_phase2,
        validation_data=(X_val_prep, y_val),
        callbacks=callbacks_phase2,
        verbose=1
    )
    
    # Combine histories
    history = {
        'phase1': history_1,
        'phase2': history_2
    }
    
    return model, history

def evaluate_model(model, X_test, y_test, label_mapping, model_name, preprocessing_fn=None):
    """
    Comprehensive model evaluation
    
    Args:
        model: Trained model
        X_test, y_test: Test data and labels
        label_mapping: Dictionary mapping class indices to ages
        model_name: Name for reporting
        preprocessing_fn: Preprocessing function
    
    Returns:
        results: Dictionary with evaluation metrics
    """
    
    print(f"\n=== EVALUATING {model_name} ===")
    
    # Preprocess test data
    if preprocessing_fn is not None:
        X_test_prep = preprocessing_fn(X_test.astype('float32'))
    else:
        X_test_prep = X_test.astype('float32') / 255.0
    
    # Get predictions
    test_loss, test_acc = model.evaluate(X_test_prep, y_test, verbose=0)
    predictions = model.predict(X_test_prep, verbose=0)
    
    # Convert to class indices
    y_test_indices = np.argmax(y_test, axis=1)
    pred_indices = np.argmax(predictions, axis=1)
    
    # Check class diversity
    unique_preds, pred_counts = np.unique(pred_indices, return_counts=True)
    unique_true, true_counts = np.unique(y_test_indices, return_counts=True)
    
    # Calculate metrics
    diversity_score = len(unique_preds) / len(unique_true)
    
    print(f"Test Accuracy: {test_acc:.3f} ({test_acc:.1%})")
    print(f"Test Loss: {test_loss:.3f}")
    print(f"Classes predicted: {len(unique_preds)}/{len(unique_true)}")
    print(f"Diversity score: {diversity_score:.3f}")
    
    # Show prediction distribution
    reverse_mapping = {idx: age for age, idx in label_mapping.items()}
    
    print("Prediction distribution:")
    for class_idx, count in zip(unique_preds, pred_counts):
        age = reverse_mapping.get(class_idx, f"Class_{class_idx}")
        print(f"  Age {age}: {count} predictions")
    
    # Classification report
    target_names = [f"Age_{reverse_mapping.get(i, i)}" for i in range(len(label_mapping))]
    class_report = classification_report(y_test_indices, pred_indices, 
                                       target_names=target_names, 
                                       output_dict=True, zero_division=0)
    
    print("\\nClassification Report:")
    print(classification_report(y_test_indices, pred_indices, 
                              target_names=target_names, zero_division=0))
    
    # Success assessment
    if test_acc > 0.467:  # Better than RandomForest baseline
        status = "✅ BETTER than RandomForest (46.7%)"
    elif diversity_score >= 0.7:
        status = "📊 Good diversity, reasonable performance"
    elif test_acc > 0.35:
        status = "📈 Shows promise, needs improvement"
    else:
        status = "⚠️ Needs significant improvement"
    
    print(f"Assessment: {status}")
    
    return {
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'diversity_score': diversity_score,
        'predictions': pred_indices,
        'classification_report': class_report,
        'status': status
    }

def compare_transfer_learning_models(X_train, y_train, X_val, y_val, X_test, y_test, label_mapping):
    """
    Compare multiple transfer learning models for deer body morphology
    
    Args:
        X_train, y_train: Training data
        X_val, y_val: Validation data  
        X_test, y_test: Test data
        label_mapping: Class to age mapping
    
    Returns:
        results: Dictionary with results for each model
    """
    
    print("=== DEER BODY MORPHOLOGY TRANSFER LEARNING COMPARISON ===")
    print(f"Training data: {X_train.shape}")
    print(f"Validation data: {X_val.shape}")
    print(f"Test data: {X_test.shape}")
    print(f"Number of classes: {len(label_mapping)}")
    
    # Models to test (optimized for body morphology)
    models_to_test = {
        'EfficientNetB3_MultiScale': ('EfficientNetB3', 'multi_scale'),
        'ResNet50_MultiScale': ('ResNet50', 'multi_scale'),
        'DenseNet169_Attention': ('DenseNet169', 'attention'),
        'DenseNet169_BOOSTED': ('DenseNet169', 'boosted_attention'),  # 🚀 BOOSTED VERSION
        'ConvNeXtTiny_Standard': ('ConvNeXtTiny', 'standard'),
    }
    
    results = {}
    
    for model_name, (base_arch, architecture) in models_to_test.items():
        try:
            print(f"\\n{'='*60}")
            print(f"TESTING {model_name}")
            print(f"{'='*60}")
            
            # Create model
            model = create_body_morphology_model(
                base_arch, 
                (224, 224, 3), 
                len(label_mapping), 
                architecture
            )
            
            # Get preprocessing function
            preprocessing_fn = get_preprocessing_function(base_arch)
            
            # Train model
            trained_model, history = train_transfer_learning_model(
                model, X_train, y_train, X_val, y_val, 
                model_name, preprocessing_fn,
                use_advanced_training=('BOOSTED' in model_name)  # Enhanced training for boosted models
            )
            
            # Evaluate model
            eval_results = evaluate_model(
                trained_model, X_test, y_test, label_mapping, 
                model_name, preprocessing_fn
            )
            
            # Store results
            results[model_name] = {
                'model': trained_model,
                'history': history,
                'evaluation': eval_results
            }
            
            # Clean up memory
            del model, trained_model
            import gc
            gc.collect()
            
        except Exception as e:
            print(f"Error training {model_name}: {e}")
            continue
    
    # Final comparison
    print(f"\\n{'='*60}")
    print("FINAL COMPARISON")
    print(f"{'='*60}")
    print("RandomForest Baseline: 46.7%")
    
    if results:
        sorted_results = sorted(
            results.items(), 
            key=lambda x: x[1]['evaluation']['test_accuracy'], 
            reverse=True
        )
        
        best_acc = sorted_results[0][1]['evaluation']['test_accuracy']
        
        for model_name, result in sorted_results:
            acc = result['evaluation']['test_accuracy']
            diversity = result['evaluation']['diversity_score']
            status = result['evaluation']['status']
            
            print(f"{model_name}: {acc:.1%} (diversity: {diversity:.2f}) - {status}")
        
        print(f"\\n💡 KEY INSIGHTS:")
        if best_acc > 0.467:
            print(f"   ✅ Body morphology transfer learning SUCCESS!")
            print(f"   🎯 Best model achieved {best_acc:.1%} vs RandomForest 46.7%")
            print(f"   🏆 Transfer learning beats domain-specific features!")
            
            # Check if boosted model won
            best_model_name = sorted_results[0][0]
            if 'BOOSTED' in best_model_name:
                print(f"   🚀 BOOSTED DenseNet169 DOMINATED the competition!")
                print(f"   🔥 Enhanced attention mechanisms paid off!")
            
        elif best_acc > 0.35:
            print(f"   📈 Body morphology shows promise ({best_acc:.1%})")
            print(f"   🔄 Consider more data or different architectures")
            
            # Suggest boosted approach if not already used
            if not any('BOOSTED' in name for name, _ in sorted_results):
                print(f"   💡 Try the BOOSTED DenseNet169 for better performance!")
        else:
            print(f"   🤔 Your RandomForest domain expertise still wins!")
            print(f"   📊 Transfer learning may not be ideal for this specific task")
            
        print(f"\\n🎯 BOOSTED MODEL PERFORMANCE:")
        boosted_results = [r for r in sorted_results if 'BOOSTED' in r[0]]
        if boosted_results:
            boosted_name, boosted_result = boosted_results[0]
            boosted_acc = boosted_result['evaluation']['test_accuracy']
            boosted_diversity = boosted_result['evaluation']['diversity_score']
            print(f"   {boosted_name}: {boosted_acc:.1%} (diversity: {boosted_diversity:.2f})")
            
            if boosted_acc > 0.467:
                improvement = boosted_acc - 0.467
                print(f"   🚀 Boosted model BEATS RandomForest by {improvement:.1%}!")
                print(f"   🏆 Multi-scale + body-part + spatial attention = WINNER!")
            else:
                print(f"   📊 Boosted model shows promise but needs more optimization")
        else:
            print(f"   ⚠️ Boosted model not tested - try running again!")
            
    else:
        print("No models completed successfully")
    
    return results

# Usage example:
print("Transfer learning models for deer body morphology loaded!")
print("🚀 NOW INCLUDES BOOSTED DenseNet169 with enhanced attention!")
print("\\nUsage:")
print("results = compare_transfer_learning_models(X_train_balanced, y_train_balanced, X_val, y_val, X_test, y_test, mapping)")
print("\\n🎯 The boosted DenseNet169 includes:")
print("   • Multi-scale attention (global + regional + local)")
print("   • Squeeze-and-Excitation channel attention")  
print("   • Body-part specific attention (neck, torso, legs)")
print("   • Spatial attention for anatomical regions")
print("   • Enhanced training with label smoothing + mixup")
print("   • 3-phase progressive training")
print("\\n🏆 Should significantly outperform the standard DenseNet169!")

SyntaxError: invalid syntax (3042470459.py, line 567)