In [1]:
import os
import numpy as np
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm
import seaborn as sns

# Scikit-learn imports
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score

# PyTorch imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import models, transforms

First, the list of chosen bird species is defined:

In [2]:
species = [
    'Ciconia_ciconia', 'Columba_livia', 'Streptopelia_decaocto',
    'Emberiza_calandra', 'Carduelis_carduelis', 'Serinus_serinus',
    'Delichon_urbicum', 'Hirundo_rustica', 'Passer_domesticus',
    'Sturnus_unicolor', 'Turdus_merula'
]

And some settings are defined for pre-processing the images.

In [3]:
DATA_DIR = 'dataset'                        # Replace with your dataset path
MODEL_SAVE_DIR = 'saved_models'
OUTPUT_FILE = 'bird_dataset_pytorch.h5'     # Output HDF5 file
IMG_SIZE = (224, 224)                       # Standard size for CNNs
TEST_SIZE = 0.1                             # Test set proportion
COMPRESSION = 'gzip'                        # Compression type
COMPRESSION_LEVEL = 7                       # Compression level (1-9)
N_SPLITS = 5                                # Number of splits for cross-validation
BATCH_SIZE = 32                             # Batch size for DataLoader

The images of the various birds must be transformed so that they can be used in the models, using PyTorch's transforms.Compose(). The transformations include data augmentation for the training set and basic preprocessing for the test set. But what is data augmentation?

Data Augmentation is a technique used to expand a training dataset by creating modified versions of existing images through random but realistic transformations. It helps improve model generalization by exposing it to varied examples without collecting new data. Common transformations include flipping, rotating, scaling, changing brightness/contrast, adding noise, or cropping. These variations simulate different real-world scenarios, making the model more robust to changes in viewpoint, lighting, or orientation.

Data augmentation is applied only during training—validation and test data remain unmodified to reflect real-world performance. It is especially useful for small datasets, reducing overfitting and improving accuracy. 

Let's now break down each component and explain the hyperparameters:

- transforms.Resize(IMG_SIZE)- Resizes the image to a fixed size. This size is typically chosen based on model architecture, in this case 224x224.

- transforms.RandomHorizontalFlip()- Randomly flips the image horizontally with a default probability of 0.5.

- transforms.RandomRotation(20)- Rotates the image randomly by up to ±20 degrees.

- transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1)- Randomly adjusts brightness, contrast, and saturation by up to ±10%.

- transforms.ToTensor()- Converts the image to a PyTorch tensor (values scaled to [0, 1]).

- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])- Normalizes the image using precomputed mean and std from ImageNet.




In [4]:
train_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
def createDataset():
    images = []
    labels = []
        
    for idx, specie in enumerate(species):
        specie_dir = os.path.join(DATA_DIR, specie)
            
        for img_name in os.listdir(specie_dir):
            img_path = os.path.join(specie_dir, img_name)
                
            try:
                img = Image.open(img_path)
                if img.mode != 'RGB':
                    img = img.convert('RGB')
                img = img.resize(IMG_SIZE)
                images.append(np.array(img))
                labels.append(idx)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

    # Load and preprocess images
    print("Loading and preprocessing images...")
    X = np.array(images)
    y = np.array(labels)

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=TEST_SIZE, stratify=y
    )

    #Cross Validation with Stratified K-Folds
    cv = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

    #Saving in HDF5 format
    print(f"Saving data to {OUTPUT_FILE}...")
    os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
    with h5py.File(f"{MODEL_SAVE_DIR}/{OUTPUT_FILE}", 'w') as hf:
        #Test set
        test_group = hf.create_group('test')
        test_group.create_dataset('X_test', data=X_test, compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)
        test_group.create_dataset('y_test', data=y_test, compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)

        # Cross-validation splits
        cv_group = hf.create_group('cross_validation')
        for fold, (train_idx, val_idx) in enumerate(cv.split(X_train, y_train)):
            fold_group = cv_group.create_group(f'fold_{fold + 1}')
            fold_group.create_dataset('X_train', data=X_train[train_idx], compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)
            fold_group.create_dataset('y_train', data=y_train[train_idx], compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)
            fold_group.create_dataset('X_val', data=X_train[val_idx], compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)
            fold_group.create_dataset('y_val', data=y_train[val_idx], compression=COMPRESSION, compression_opts=COMPRESSION_LEVEL)

        # Save metadata
        hf.attrs['species'] = np.array(species, dtype=h5py.string_dtype())
        hf.attrs['image_size'] = IMG_SIZE
        hf.attrs['n_splits'] = N_SPLITS
        hf.attrs['compression'] = COMPRESSION
        hf.attrs['compression_level'] = COMPRESSION_LEVEL
    print("Process completed successfully!")
    print(f"Data saved to {OUTPUT_FILE} with {COMPRESSION} compression level {COMPRESSION_LEVEL}")


In [6]:
def getDataloaders(filepath, idx, batch_size=BATCH_SIZE):
    with h5py.File(filepath, 'r') as hf:
        X_test = hf['test/X_test'][:]
        y_test = hf['test/y_test'][:]

        fold_group = hf[f'cross_validation/fold_{idx}']
        X_train = fold_group['X_train'][:]
        y_train = fold_group['y_train'][:]
        X_val = fold_group['X_val'][:]
        y_val = fold_group['y_val'][:]

        species = hf.attrs['species']
    
    # Apply transformations
    X_train = np.array([train_transforms(Image.fromarray(img)) for img in X_train])
    X_val = np.array([test_transforms(Image.fromarray(img)) for img in X_val])
    X_test = np.array([test_transforms(Image.fromarray(img)) for img in X_test])


    #Permute necessary for PyTorch (C, H, W) 
    X_train_tensor = torch.from_numpy(X_train).float()
    y_train_tensor = torch.from_numpy(y_train).long()
    X_val_tensor = torch.from_numpy(X_val).float()
    y_val_tensor = torch.from_numpy(y_val).long()
    X_test_tensor = torch.from_numpy(X_test).float()
    y_test_tensor = torch.from_numpy(y_test).long()

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, species

In [7]:
#createDataset()

In [8]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
NUM_EPOCHS = 30
NUM_CLASSES = 11
#HYPERPARAMS = {
#    'model_name': ['resnet18', 'efficientnet_b0', 'resnet34'],
#    'learning_rate': [0.001, 0.0005, 0.0001],
#    'batch_size': [BATCH_SIZE],
#    'weight_decay': [0, 0.001, 0.0001],
#    'optimizer': ['adam', 'sgd'],
#    'scheduler': [True, False],
#    'dropout_rate': [0, 0.2, 0.5]
#}

HYPERPARAMS = {
    'model_name': ['resnet18', 'efficientnet_b0'],
    'learning_rate': [0.001, 0.0005],
    'batch_size': [BATCH_SIZE],
    'weight_decay': [0, 0.001],
    'optimizer': ['adam'],
    'scheduler': [True, False],
    'dropout_rate': [0, 0.5]
}

os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

def getModel(name, nClasses, dropout_rate=0):
    if name == 'resnet18':
        model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        model.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.fc.in_features, nClasses)
        )
    elif name == 'efficientnet_b0':
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
        model.classifier[1] = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.classifier[1].in_features, nClasses)
        )
    elif name == 'resnet34':
        model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
        model.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(model.fc.in_features, nClasses)
        )
    return model.to(DEVICE)

def getOptimizer(model, params):
    if params['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'], weight_decay=params['weight_decay'])
    elif params['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'], momentum=0.9, weight_decay=params['weight_decay'])
    return optimizer

def trainModel(model, train_loader, val_loader, params):
    criterion = nn.CrossEntropyLoss()
    optimizer = getOptimizer(model, params)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3) if params['scheduler'] else None
    
    best_f1 = 0
    THRESHOLD = 5
    improvementCounter = 0
    history = {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': [], 'val_f1': []}
    
    for epoch in range(NUM_EPOCHS):
        print(f'Epoch {epoch+1}/{NUM_EPOCHS}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)
        history['train_loss'].append(epoch_loss)
        history['train_acc'].append(epoch_acc)
        
        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0
        all_preds, all_labels = [], []
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                
                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects += torch.sum(preds == labels.data)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_running_corrects.double() / len(val_loader.dataset)
        val_f1 = f1_score(all_labels, all_preds, average='weighted')
        
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_f1'].append(val_f1)
        
        if scheduler:
            scheduler.step(val_loss)
        
        if val_f1 > best_f1:
            best_f1 = val_f1
            improvementCounter = 0
            torch.save(model.state_dict(), os.path.join(MODEL_SAVE_DIR, f'best_{params["model_name"]}_fold.pth'))
            print(f'[GridSearch]: New Best F1 Score: {best_f1:.4f}')
        else:
            improvementCounter +=1
            if improvementCounter >= THRESHOLD:
                print(f'[GridSearch]: Stopping at {epoch+1} there\'s no more improvement.')
                break

    return history, best_f1

In [9]:
def gridSearch(filepath, n_splits=N_SPLITS):
    best_params = None
    best_f1 = 0
    
    # Generate all possible hyperparameter combinations
    keys, values = zip(*HYPERPARAMS.items())
    param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    
    print(f"\nBeginning GridSearch with {len(param_combinations)} combinations...")
    
    for params in tqdm(param_combinations):
        print()
        print("="*50)
        print(f"Testing combination: {params}")
        fold_f1_scores = []
        
        # Cross-validation loop
        for fold_idx in range(1, n_splits+1):
            train_loader, val_loader, _, species = getDataloaders(filepath, fold_idx, params['batch_size'])
            print(f"Validating {species[fold_idx]}")
            model = getModel(params['model_name'], NUM_CLASSES)
            _, fold_f1 = trainModel(model, train_loader, val_loader, params)
            fold_f1_scores.append(fold_f1)
            
            # Clear memory
            del model
            torch.cuda.empty_cache()
        
        # Calculate average F1 across folds
        avg_f1 = np.mean(fold_f1_scores)
        print(f"Average F1 across folds: {avg_f1:.4f}")
        
        # Update best parameters if improved
        if avg_f1 > best_f1:
            best_f1 = avg_f1
            best_params = params
            print(f"New best parameters found with F1: {best_f1:.4f}")
    
    print()
    print("\GridSearch completed!")
    print(f"Best parameters: {best_params}")
    print(f"Best average F1 score: {best_f1:.4f}")
    
    return best_params

def bestTrainModel(filepath, best_params):
    train_loader, val_loader, test_loader, _ = getDataloaders(filepath, 1, best_params['batch_size'])
    trainset = torch.utils.data.ConcatDataset([train_loader.dataset, val_loader.dataset])
    trainloaderset = DataLoader(trainset, batch_size=best_params['batch_size'], shuffle=True)
    model = getModel(best_params['model_name'], NUM_CLASSES, best_params['dropout_rate'])
    bestHist,_ = trainModel(model, trainloaderset, test_loader, best_params)

    #Evaluate on test set
    model.eval()
    all_preds, all_labels = [], []
    test_loss = 0.0

    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            inputs = inputs.permute(0, 3, 1, 2)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            
            test_loss += loss.item() * inputs.size(0)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    test_loss /= len(test_loader.dataset)
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1 = f1_score(all_labels, all_preds, average='weighted')
    print("\nFinal Model Evaluation:")
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test F1 Score: {test_f1:.4f}")

    return model, bestHist, (all_labels, all_preds), test_loss, test_acc, test_f1

    

def plotting(history, cm, test_metrics, species):
    """Visualize training results and metrics"""
    plt.figure(figsize=(20, 12))
    
    # Plot training history
    plt.subplot(2, 2, 1)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(2, 2, 2)
    plt.plot(history['train_acc'], label='Train Accuracy')
    plt.plot(history['val_acc'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.subplot(2, 2, 3)
    plt.plot(history['val_f1'], label='Validation F1')
    plt.title('Validation F1 Score')
    plt.xlabel('Epoch')
    plt.ylabel('F1 Score')
    plt.legend()
    
    # Plot confusion matrix
    plt.subplot(2, 2, 4)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=species, yticklabels=species)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    
    plt.tight_layout()
    plt.show()
    
    # Print test metrics
    print("\nTest Set Metrics:")
    print(f"Loss: {test_metrics[0]:.4f}")
    print(f"Accuracy: {test_metrics[1]:.4f}")
    print(f"F1 Score: {test_metrics[2]:.4f}")

  print("\GridSearch completed!")


In [None]:
# Main execution
# 1. Perform hyperparameter search
best_params = gridSearch(f"{MODEL_SAVE_DIR}/{OUTPUT_FILE}", N_SPLITS)  


Beginning GridSearch with 32 combinations...


  0%|          | 0/32 [00:00<?, ?it/s]


Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.3745
Epoch 2/30
----------
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.4706
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.5744
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.6283
Epoch 10/30
----------
[GridSearch]: New Best F1 Score: 0.6357
Epoch 11/30
----------
[GridSearch]: New Best F1 Score: 0.6422
Epoch 12/30
----------
[GridSearch]: New Best F1 Score: 0.6466
Epoch 13/30
----------
[GridSearch]: New Best F1 Score: 0.6586
Epoch 14/30
----------
Epoch 15/30
----------
Epoch 16/30
----------
Epoch 17/30
----------
Epoch 18/30
----------
[GridSearch]: Stopping at 18 there's no more improvement.
Validating Streptopelia

  3%|▎         | 1/32 [44:10<22:49:36, 2650.86s/it]

[GridSearch]: Stopping at 23 there's no more improvement.
Average F1 across folds: 0.6598
New best parameters found with F1: 0.6598

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.2820
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4577
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5777
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.6490
Epoch 9/30
----------
Epoch 10/30
----------
[GridSearch]: New Best F1 Score: 0.6646
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
Epoch 15/30
----------
[GridSearch]: Stopping at 15 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best 

  6%|▋         | 2/32 [1:22:55<20:29:35, 2459.19s/it]

[GridSearch]: Stopping at 19 there's no more improvement.
Average F1 across folds: 0.6657
New best parameters found with F1: 0.6657

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5245
Epoch 2/30
----------
Epoch 3/30
----------
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.5629
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.5727
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
[GridSearch]: Stopping at 13 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.4044
Epoch 2/30
----------
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5548
Epoch 4/30
----------
Epoc

  9%|▉         | 3/32 [1:56:58<18:16:36, 2268.86s/it]

[GridSearch]: Stopping at 13 there's no more improvement.
Average F1 across folds: 0.5907

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.3973
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4775
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5199
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.5494
Epoch 7/30
----------
Epoch 8/30
----------
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
[GridSearch]: Stopping at 11 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.4535
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5173
Epoch 3/30
----------
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.5431
Epoch 5/30

 12%|█▎        | 4/32 [2:27:10<16:14:37, 2088.48s/it]

[GridSearch]: Stopping at 20 there's no more improvement.
Average F1 across folds: 0.5796

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.3188
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5044
Epoch 3/30
----------
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.5579
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.6497
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.6616
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
[GridSearch]: Stopping at 13 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.4331
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5253
Epoch 3

 16%|█▌        | 5/32 [2:54:20<14:25:28, 1923.26s/it]

[GridSearch]: Stopping at 11 there's no more improvement.
Average F1 across folds: 0.6274

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.2472
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4928
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5266
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.5495
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.6430
Epoch 10/30
----------
[GridSearch]: New Best F1 Score: 0.6443
Epoch 11/30
----------
[GridSearch]: New Best F1 Score: 0.6507
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
Epoch 15/30
----------
[GridSearch]: New Best F1 Score: 0.6582
Epoch 16/30
----------
Epoch 17/30
----------
[GridSea

 19%|█▉        | 6/32 [3:33:47<14:58:43, 2074.00s/it]

[GridSearch]: Stopping at 15 there's no more improvement.
Average F1 across folds: 0.6539

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.2395
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4137
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.4246
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.4632
Epoch 5/30
----------
[GridSearch]: New Best F1 Score: 0.4799
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.4953
Epoch 7/30
----------
Epoch 8/30
----------
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.5347
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
[GridSearch]: Stopping at 14 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[Grid

 22%|██▏       | 7/32 [4:07:16<14:15:25, 2053.03s/it]

[GridSearch]: Stopping at 10 there's no more improvement.
Average F1 across folds: 0.5507

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.001, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.3560
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4625
Epoch 3/30
----------
Epoch 4/30
----------
Epoch 5/30
----------
[GridSearch]: New Best F1 Score: 0.4791
Epoch 6/30
----------
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.4968
Epoch 8/30
----------
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
[GridSearch]: New Best F1 Score: 0.5263
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
Epoch 15/30
----------
Epoch 16/30
----------
[GridSearch]: New Best F1 Score: 0.5384
Epoch 17/30
----------
Epoch 18/30
----------
Epoch 19/30
----------
Epoch 20/30
----------
[GridSearch]: New 

 25%|██▌       | 8/32 [4:34:03<12:44:17, 1910.73s/it]

[GridSearch]: Stopping at 11 there's no more improvement.
Average F1 across folds: 0.5450

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5556
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.6177
Epoch 3/30
----------
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.6288
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.7352
Epoch 8/30
----------
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.7445
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
[GridSearch]: Stopping at 14 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5486
Epoch 2/30
----------
Epoch 3/30
----------
[Grid

 28%|██▊       | 9/32 [5:14:29<13:14:17, 2072.07s/it]

[GridSearch]: Stopping at 24 there's no more improvement.
Average F1 across folds: 0.7360
New best parameters found with F1: 0.7360

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5834
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5967
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.6286
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.7070
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.7125
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.7193
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.7210
Epoch 10/30
----------
Epoch 11/30
----------
[GridSearch]: New Best F1 Score: 0.7254
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
Epoch 15/30
----------
[GridSearch]: N

 31%|███▏      | 10/32 [5:50:59<12:53:05, 2108.44s/it]

[GridSearch]: Stopping at 14 there's no more improvement.
Average F1 across folds: 0.7237

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.6297
Epoch 2/30
----------
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.6474
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
[GridSearch]: Stopping at 8 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.4137
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.4802
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5848
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.6276
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.6367
Epoch 7/30
----------
Epoch 8/30
---

 34%|███▍      | 11/32 [6:17:39<11:23:30, 1952.87s/it]

[GridSearch]: Stopping at 10 there's no more improvement.
Average F1 across folds: 0.6538

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.6045
Epoch 2/30
----------
Epoch 3/30
----------
Epoch 4/30
----------
[GridSearch]: New Best F1 Score: 0.6239
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.6442
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.6606
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.6740
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
Epoch 14/30
----------
[GridSearch]: Stopping at 14 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5349
Epoch 2/30
----------
[GridSearch]: New Best F

 38%|███▊      | 12/32 [6:41:58<10:00:51, 1802.56s/it]

[GridSearch]: Stopping at 12 there's no more improvement.
Average F1 across folds: 0.6542

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5465
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5741
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.6373
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.7115
Epoch 9/30
----------
[GridSearch]: New Best F1 Score: 0.7178
Epoch 10/30
----------
[GridSearch]: New Best F1 Score: 0.7209
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
[GridSearch]: New Best F1 Score: 0.7221
Epoch 14/30
----------
[GridSearch]: New Best F1 Score: 0.7265
Epoch 15/30
----------
[GridSearch]: New Best F1 Score: 0.7269
Epoch 16/30
---

 41%|████      | 13/32 [7:24:12<10:40:56, 2024.02s/it]

[GridSearch]: Stopping at 21 there's no more improvement.
Average F1 across folds: 0.7262

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': True, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5827
Epoch 2/30
----------
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5985
Epoch 4/30
----------
Epoch 5/30
----------
[GridSearch]: New Best F1 Score: 0.6268
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.7161
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.7290
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.7506
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
Epoch 13/30
----------
[GridSearch]: Stopping at 13 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5759
Epoch 2/30
----------
[Gri

 44%|████▍     | 14/32 [8:06:50<10:55:34, 2185.24s/it]

[GridSearch]: Stopping at 17 there's no more improvement.
Average F1 across folds: 0.7209

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5723
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5910
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5942
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
[GridSearch]: New Best F1 Score: 0.5996
Epoch 7/30
----------
Epoch 8/30
----------
[GridSearch]: New Best F1 Score: 0.6112
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
[GridSearch]: New Best F1 Score: 0.6226
Epoch 12/30
----------
[GridSearch]: New Best F1 Score: 0.6276
Epoch 13/30
----------
Epoch 14/30
----------
Epoch 15/30
----------
Epoch 16/30
----------
Epoch 17/30
----------
[GridSearch]: Stopping at 17 there's no more imp

 47%|████▋     | 15/32 [8:32:53<9:26:02, 1997.77s/it] 

[GridSearch]: Stopping at 10 there's no more improvement.
Average F1 across folds: 0.6293

Testing combination: {'model_name': 'resnet18', 'learning_rate': 0.0005, 'batch_size': 32, 'weight_decay': 0.001, 'optimizer': 'adam', 'scheduler': False, 'dropout_rate': 0.5}
Validating Columba_livia
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.4979
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5528
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.6069
Epoch 4/30
----------
Epoch 5/30
----------
Epoch 6/30
----------
Epoch 7/30
----------
[GridSearch]: New Best F1 Score: 0.6583
Epoch 8/30
----------
Epoch 9/30
----------
Epoch 10/30
----------
Epoch 11/30
----------
Epoch 12/30
----------
[GridSearch]: Stopping at 12 there's no more improvement.
Validating Streptopelia_decaocto
Epoch 1/30
----------
[GridSearch]: New Best F1 Score: 0.5117
Epoch 2/30
----------
[GridSearch]: New Best F1 Score: 0.5677
Epoch 3/30
----------
[GridSearch]: New Best F1 Score: 0.5816
Epoc

In [None]:
# 2. Train final model with best parameters
final_model, history, (true_labels, pred_labels), test_loss, test_acc, test_f1 = bestTrainModel(OUTPUT_FILE, best_params)
    
# 3. Generate confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
    
# 4. Plot results
plotting(history, cm, (test_loss, test_acc, test_f1), final_model.species if hasattr(final_model, 'species') else species)
    
# 5. Save final model
torch.save({
    'model_state_dict': final_model.state_dict(),
    'best_params': best_params,
    'test_metrics': (test_loss, test_acc, test_f1)
}, os.path.join(MODEL_SAVE_DIR, 'final_model.pth'))
print("\nFinal model saved!")