In [1]:
import os
import numpy as np
import h5py
import time
import gc
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import json
import csv
import pandas as pd
from datetime import datetime
from tqdm import tqdm


# Scikit-learn imports
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, top_k_accuracy_score, precision_recall_curve, roc_auc_score
from sklearn.preprocessing import label_binarize

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision.transforms.functional import to_pil_image
from torchvision import models
import torch.nn.functional as F
from torchcam.methods import GradCAM
from torchvision import transforms
from torchcam.utils import overlay_mask

First, the list of chosen bird species is defined:

In [2]:
species = [
    'Ciconia_ciconia', 'Columba_livia', 'Streptopelia_decaocto',
    'Emberiza_calandra', 'Carduelis_carduelis', 'Serinus_serinus',
    'Delichon_urbicum', 'Hirundo_rustica', 'Passer_domesticus',
    'Sturnus_unicolor', 'Turdus_merula'
]

And some settings are defined for pre-processing the images.

In [3]:
MODEL_SAVE_DIR = 'saved_models/phase1'
RESULT_DIR = 'images'  
DATA_DIR = "new_dataset"
DATASET = 'dataset_20250506_145756.h5'
BATCH_SIZE = [16]
N_SPLITS = 3                            
NUM_EPOCHS = 5
NUM_CLASSES = 11
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [4]:
def openH5File(filepath, fold_idx=None):
    file = h5py.File(filepath, 'r')
    datasets = {}

    if fold_idx is not None:
        try:
            fold_group = file[f'cross_validation/fold_{fold_idx}']
            datasets['X_train'] = fold_group['X_train']
            datasets['y_train'] = fold_group['y_train']
            datasets['X_val'] = fold_group['X_val']
            datasets['y_val'] = fold_group['y_val']
        except KeyError:
            raise ValueError(f"Fold {fold_idx} not found in file. Available groups: {list(file['cross_validation'].keys())}")
    
    datasets['X_test'] = file['test']['X_test']
    datasets['y_test'] = file['test']['y_test']
    return datasets



def createDataloaders(X_h5, y_h5, batch_size=BATCH_SIZE, shuffle=False):
    X_np = X_h5[:]  # (N, H, W, C)
    if X_np.ndim == 4:
        X_np = np.transpose(X_np, (0, 3, 1, 2))  # to (N, C, H, W)

    X_tensor = torch.from_numpy(X_np).float()
    y_tensor = torch.from_numpy(y_h5[:]).long()
    dataset = TensorDataset(X_tensor, y_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=4, pin_memory=True)
    return dataloader

def getDataloaders(filepath, fold_idx, batch_size):
    dataset = openH5File(filepath, fold_idx)
    test_loader = createDataloaders(dataset['X_test'], dataset['y_test'], batch_size=batch_size, shuffle=False)
    if fold_idx is not None:
        train_loader = createDataloaders(dataset['X_train'], dataset['y_train'], batch_size=batch_size, shuffle=True)
        val_loader = createDataloaders(dataset['X_val'], dataset['y_val'], batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

In [5]:
def getModel(name, nClasses, dropout_rate=0):
    if name == 'efficientnet_b0':
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        model.classifier[1] = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.classifier[1].in_features, nClasses)
        )
    elif name == 'efficientnet_V2':
        model = models.efficientnet_v2_s(weights=models.EfficientNet_V2_S_Weights.DEFAULT)
        model.classifier[1] = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.classifier[1].in_features, nClasses)
        )
    return model.to(DEVICE)

def getOptimizer(model, params):
    if params['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=0.9, weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'adamw':
        optimizer = optim.AdamW(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay'])
    return optimizer

def trainModel(model, train_loader, val_loader, params):
    criterion = nn.CrossEntropyLoss()
    optimizer = getOptimizer(model, params)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3) if params['scheduler'] else None
    
    best_f1 = 0
    THRESHOLD = 5
    improvementCounter = 0
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'val_f1': []}
    
    for epoch in range(NUM_EPOCHS):

        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        history['train_loss'].append(epoch_loss)
        history['train_acc'].append(epoch_acc)
        
        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                
                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects += torch.sum(preds == labels.data)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_running_corrects.double() / len(val_loader.dataset)
        val_f1 = f1_score(all_labels, all_preds, average='weighted')
        
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)
        
        if scheduler:
            scheduler.step(val_loss)
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            improvementCounter = 0
        else:
            improvementCounter +=1
            if improvementCounter >= THRESHOLD:
                break

    return history, best_f1

In [6]:
def gridSearch(filepath, n_splits, hyperparams):
    results_log = {
        "timestamp": datetime.now().isoformat(),
        "total_combinations": len(list(itertools.product(*hyperparams.values()))),
        "best_f1": 0,
        "best_params": None,
        "all_results": []
    }

    # Generate all possible hyperparameter combinations
    keys, values = zip(*hyperparams.items())
    param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    print(f"\nBeginning GridSearch with {len(param_combinations)} combinations...")
    
    for params in tqdm(param_combinations):
        torch.cuda.reset_peak_memory_stats()
        print("\n" + "="*50)
        print(f"Testing combination: {params}")
        fold_f1_scores = []
        fold_acc_scores = []
        start_time = time.time()
        
        # Cross-validation loop
        for fold_idx in range(1, n_splits+1):
            train_loader, val_loader, _ = getDataloaders(filepath, fold_idx, params['batch_size'])
            model = getModel(params['model_name'], NUM_CLASSES, params.get('dropout_rate', 0))
            model.to(DEVICE)
            history, fold_f1 = trainModel(model, train_loader, val_loader, params)
            print(f"Fold {fold_idx} Best F1 Score: {fold_f1:.4f}")
            fold_f1_scores.append(fold_f1)
            fold_acc_scores.append(history['val_acc'][-1].item())

            # Clear memory
            del model
            gc.collect()
            torch.cuda.empty_cache()
        
        # Calculate average F1 across folds
        avg_f1 = np.mean(fold_f1_scores)
        std_f1 = np.std(fold_f1_scores)
        avg_acc = np.mean(fold_acc_scores)
        std_acc = np.std(fold_acc_scores)
        time_taken = time.time() - start_time

        # Record this combination's results
        result_entry = {
            "params": params,
            "avg_f1": avg_f1,
            "std_f1": std_f1,
            "mean_acc": avg_acc,
            "std_acc": std_acc,
            "f1_scores": fold_f1_scores,
            "acc_scores": fold_acc_scores,
            "memory_used_GB": torch.cuda.max_memory_allocated()/1e9,
            "time_taken": time_taken
        }
        results_log["all_results"].append(result_entry)
        
        # Update best parameters if improved
        if avg_f1 > results_log["best_f1"]:
            results_log["best_f1"] = avg_f1
            results_log["best_params"] = params
            print(f"New best parameters found with F1: {results_log["best_f1"]:.4f}")

    #Finalize results        
    print("\nGridSearch completed!")
    torch.save(results_log["best_params"], os.path.join(MODEL_SAVE_DIR, f'gridsearch_setup1_{datetime.now().isoformat()}.pth'))

    # Save JSON log
    json_path = os.path.join(MODEL_SAVE_DIR, f"gridsearch_results_{datetime.now().isoformat()}.json")
    with open(json_path, 'w') as f:
        json.dump(results_log, f, indent=4)

    # Save CSV results
    csv_path = os.path.join(MODEL_SAVE_DIR, f"gridsearch_results_{datetime.now().isoformat()}.csv")
    with open(csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        header = ["params", "avg_f1", "std_f1", "mean_acc", "std_acc", "f1_scores", "acc_scores", "memory_used_GB", "time_taken"]
        writer.writerow(header)
        for res in results_log["all_results"]:
            writer.writerow([
                str(res["params"]), res["avg_f1"], res["std_f1"],
                res["mean_acc"], res["std_acc"],
                res["f1_scores"], res["acc_scores"],
                res["memory_used_GB"], res["time_taken"]
            ])

def bestTrainModel(filepath, best_params):
    model = getModel(best_params['model_name'], nClasses=NUM_CLASSES, dropout_rate=best_params['dropout_rate'])
    train_loader, val_loader, test_loader = getDataloaders(filepath, fold_idx=1, batch_size=BATCH_SIZE[0])

    history, _ = trainModel(model, train_loader, val_loader, best_params)

    # Evaluate on test set
    model.eval()
    all_preds, all_labels = [], []
    all_probs = []
    cam_images = []
    criterion = nn.CrossEntropyLoss()
    test_loss = 0.0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            test_loss += loss.item() * inputs.size(0)
            total_samples += inputs.size(0)

            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)
    all_probs = np.array(all_probs)

    cm = confusion_matrix(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    top1_acc = accuracy_score(all_labels, all_preds)
    test_loss /= total_samples

    labels_range = list(range(NUM_CLASSES))
    try:
        top3_acc = top_k_accuracy_score(all_labels, all_probs, k=3, labels=labels_range)
    except ValueError:
        top3_acc = 0.0  # fallback

    try:
        binarized_labels = label_binarize(all_labels, classes=labels_range)
        auprc_macro = roc_auc_score(binarized_labels, all_probs, average='macro', multi_class='ovr')
    except Exception:
        auprc_macro = 0.0  # fallback

    # Compute PR curve for the first class (just for plotting)
    precision, recall, _ = precision_recall_curve(binarized_labels[:, 0], all_probs[:, 0])

    metrics = {
        'test_loss': test_loss,
        'top1_accuracy': top1_acc,
        'top3_accuracy': top3_acc,
        'f1_score': f1,
        'confusion_matrix': cm,
        'macro_auprc': auprc_macro,
        'precision': precision.tolist(),
        'recall': recall.tolist()
    }

    return model, history, metrics, cam_images

In [7]:
def plotting(history, cm, metrics_dict, species, cam_images=None):
    plt.figure(figsize=(24, 12))

    # Plot training history
    plt.subplot(2, 3, 1)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Val Loss')
    plt.title('Training History')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plot confusion matrix
    plt.subplot(2, 3, 2)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=species, yticklabels=species)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')

    # Plot precision-recall curve
    plt.subplot(2, 3, 3)
    plt.plot(metrics_dict['recall'],
            metrics_dict['precision'], lw=2)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f'Precision-Recall Curve (AUPRC: {metrics_dict["macro_auprc"]:.4f})')

    # Plot Grad-CAM visualizations
    for i, (img, activation, label) in enumerate(cam_images[:3]):
        plt.subplot(2, 3, 4+i)
        result = overlay_mask(
            to_pil_image(img), 
            to_pil_image(activation[0].squeeze(0), mode='F'), 
            alpha=0.5
        )
        plt.imshow(result)
        plt.title(f'True: {species[label]}\nPred: {species[torch.argmax(activation)]}')
        plt.axis('off')

    plt.tight_layout()
    plot_path = os.path.join(MODEL_SAVE_DIR, RESULT_DIR, f"training_results_{datetime.now().isoformat()}.png")
    plt.savefig(plot_path, dpi=300)
    plt.close()

    # Save individual Grad-CAM images
    for i, (img, activation, label) in enumerate(cam_images[:3]):
        result = overlay_mask(
            to_pil_image(img), 
            to_pil_image(activation[0].squeeze(0), mode='F'), 
            alpha=0.5
        )
        result.save(os.path.join(MODEL_SAVE_DIR, RESULT_DIR, f"gradcam_{i}_{datetime.now().isoformat()}.png"))


In [None]:
# Main execution
# 1. Perform hyperparameter search
params = {
    'model_name': ['efficientnet_b0', 'efficientnet_V2'],
    'learning_rate': [0.0005, 0.0001],
    'batch_size': BATCH_SIZE,
    'weight_decay': [0.0001, 0],
    'optimizer': ['adamw'],
    'scheduler': [True],
    'dropout_rate': [0, 0.2]
}



# Ensure directories exist
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
os.makedirs(os.path.join(MODEL_SAVE_DIR, RESULT_DIR), exist_ok=True)

gridSearch(f"{DATA_DIR}/{DATASET}", N_SPLITS, params)


Beginning GridSearch with 16 combinations...


  0%|          | 0/16 [00:00<?, ?it/s]


Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.7496
Fold 2 Best F1 Score: 0.7270


  6%|▋         | 1/16 [11:58<2:59:38, 718.59s/it]

Fold 3 Best F1 Score: 0.7552
New best parameters found with F1: 0.7439

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7526
Fold 2 Best F1 Score: 0.7349


 12%|█▎        | 2/16 [23:59<2:47:59, 719.94s/it]

Fold 3 Best F1 Score: 0.7306

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.7465
Fold 2 Best F1 Score: 0.7536


 19%|█▉        | 3/16 [35:59<2:35:59, 719.94s/it]

Fold 3 Best F1 Score: 0.7243

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7345
Fold 2 Best F1 Score: 0.7462


 25%|██▌       | 4/16 [47:59<2:23:59, 719.94s/it]

Fold 3 Best F1 Score: 0.7443

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.7770
Fold 2 Best F1 Score: 0.7936


 31%|███▏      | 5/16 [1:00:01<2:12:08, 720.74s/it]

Fold 3 Best F1 Score: 0.7826
New best parameters found with F1: 0.7844

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7934
Fold 2 Best F1 Score: 0.7725


 38%|███▊      | 6/16 [1:12:03<2:00:12, 721.22s/it]

Fold 3 Best F1 Score: 0.7736

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.7777
Fold 2 Best F1 Score: 0.7901


 44%|████▍     | 7/16 [1:24:04<1:48:09, 721.07s/it]

Fold 3 Best F1 Score: 0.7798

Testing combination: {'model_name': 'efficientnet_b0', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7948
Fold 2 Best F1 Score: 0.7846


 50%|█████     | 8/16 [1:36:05<1:36:08, 721.04s/it]

Fold 3 Best F1 Score: 0.7813
New best parameters found with F1: 0.7869

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /home/w4ter/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth


100%|██████████| 82.7M/82.7M [00:29<00:00, 2.95MB/s]


Fold 1 Best F1 Score: 0.7203
Fold 2 Best F1 Score: 0.7052


 56%|█████▋    | 9/16 [2:02:12<1:54:57, 985.40s/it]

Fold 3 Best F1 Score: 0.7238

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7160
Fold 2 Best F1 Score: 0.7296


 62%|██████▎   | 10/16 [2:27:48<1:55:33, 1155.66s/it]

Fold 3 Best F1 Score: 0.7191

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.7233
Fold 2 Best F1 Score: 0.7095


 69%|██████▉   | 11/16 [2:53:19<1:45:51, 1270.30s/it]

Fold 3 Best F1 Score: 0.7198

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0005, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.7322
Fold 2 Best F1 Score: 0.7260


 75%|███████▌  | 12/16 [3:18:51<1:30:00, 1350.12s/it]

Fold 3 Best F1 Score: 0.7343

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.8340
Fold 2 Best F1 Score: 0.8195


 81%|████████▏ | 13/16 [3:44:29<1:10:20, 1406.91s/it]

Fold 3 Best F1 Score: 0.8175
New best parameters found with F1: 0.8237

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0.0001, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.8356
Fold 2 Best F1 Score: 0.8288


 88%|████████▊ | 14/16 [4:10:09<48:14, 1447.05s/it]  

Fold 3 Best F1 Score: 0.8284
New best parameters found with F1: 0.8309

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0}
Fold 1 Best F1 Score: 0.8275
Fold 2 Best F1 Score: 0.8191


 94%|█████████▍| 15/16 [4:35:42<24:33, 1473.03s/it]

Fold 3 Best F1 Score: 0.8245

Testing combination: {'model_name': 'efficientnet_V2', 'learning_rate': 0.0001, 'batch_size': 16, 'weight_decay': 0, 'optimizer': 'adamw', 'scheduler': True, 'dropout_rate': 0.2}
Fold 1 Best F1 Score: 0.8382
Fold 2 Best F1 Score: 0.8279
Fold 3 Best F1 Score: 0.8218


100%|██████████| 16/16 [5:01:16<00:00, 1129.80s/it]



GridSearch completed!


In [9]:
BEST_PARAMS = 'gridsearch_setup1_2025-05-06T20:48:47.915540.pth'

# 2. Train final model with best parameters
best_params = torch.load(os.path.join(MODEL_SAVE_DIR, BEST_PARAMS))
best_model, best_history, best_metrics, best_camImages  = bestTrainModel(f"{DATA_DIR}/{DATASET}", best_params)
speciesModel = best_model.species if hasattr(best_model, 'species') else species

# 3. Generate confusion matrix
cm = best_metrics['confusion_matrix']
    
# 4. Plot results
plotting(
    history=best_history,
    cm=cm,
    metrics_dict=best_metrics,
    species=speciesModel,
    cam_images=best_camImages
)
    
# 5. Save final model and metrics
final_model_path = os.path.join(MODEL_SAVE_DIR, f'final_model_{datetime.now().isoformat()}.pth')
torch.save({
    'model_state_dict': best_model.state_dict(),
    'best_params': best_params,
    'metrics': best_metrics,
    'class_names': speciesModel,
    'training_history': best_history
}, final_model_path)

# Save metrics separately
with open(os.path.join(MODEL_SAVE_DIR, f"final_metrics_{datetime.now().isoformat()}.json"), 'w') as f:
    json.dump({
        'test_loss': best_metrics['test_loss'],
        'top1_accuracy': best_metrics['top1_accuracy'],
        'top3_accuracy': best_metrics['top3_accuracy'],
        'f1_score': best_metrics['f1_score'],
        'macro_auprc': best_metrics['macro_auprc'],
        'precision_recall_curve': {
            'precision': best_metrics['precision'],
            'recall': best_metrics['recall']
        }
    }, f, indent=2)

# Save confusion matrix
np.save(os.path.join(MODEL_SAVE_DIR, f"confusion_matrix_{datetime.now().isoformat()}.npy"), cm)