*KTH Royal Institute of Technology* \
DD2424 Deep Learning in Data Science | Project (grade E part)\
Diogo Paulo 030224-8216 (diogop@kth.se)\
Hugo Dezerto 20011224-8257 (hugoad@kth.se) \
Maria Sebastião 031010-T207 (mcms2@kth.se)

# Grade E

In [20]:
# Just to check if the current working directory is correct

import os
os.getcwd()

'/home/jupyter/grade_E'

In [21]:
import torch
import torchvision.transforms as transforms
from torchvision.datasets import OxfordIIITPet
from torchvision.models import resnet18
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import time

from torch.utils.data import random_split, Dataset

from collections import defaultdict
import random
import numpy as np

# Set device. Use GPU if available else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Binary classification

In [26]:
# --------------------- BINARY CLASSIFICATION ---------------------

# Transform: Resize, normalize (ImageNet mean/std)
transform = transforms.Compose([
    transforms.Resize(224),        # Resize shortest side to 224, keep aspect ratio
    transforms.CenterCrop(224),    # Crop from the center to 224x224
    transforms.ToTensor(), # Convert to tensor
    # Normalize with ImageNet mean and std (check https://pytorch.org/hub/pytorch_vision_resnet/)
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset as binary classification problem (cat vs dog). The resulting object is a list of tuples (image, label)
train_dataset = OxfordIIITPet(root='./dataset', split='trainval', target_types='binary-category', transform=transform, download=True)
test_dataset = OxfordIIITPet(root='./dataset', split='test', target_types='binary-category', transform=transform, download=True)
print("Dataset loaded. Number of training samples:", len(train_dataset), "Number of test samples:", len(test_dataset))

# Create DataLoader objects to efficiently load data in batches.
# - train_loader: loads training data in batches of 32 and shuffles the data each epoch (improves generalization).
# - test_loader: loads test data in batches of 32 without shuffling (for consistent evaluation).
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Load a ResNet18 model pre-trained on ImageNet
model = resnet18(weights='IMAGENET1K_V1')
print("ResNet18 model loaded.")

# Freeze all the parameters in the pre-trained model
for param in model.parameters():
    param.requires_grad = False

# Replace the final fully connected layer to output 2 classes (cat vs dog)
model.fc = nn.Linear(model.fc.in_features, 2)
# Move the model to the selected device (GPU if available, else CPU)
model = model.to(device)

# Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.002) # TUNE

# Training loop
def train_model(num_epochs):
    model.train()  # Set model to training mode (enables dropout, batchnorm updates)
    for epoch in range(num_epochs):
        running_loss = 0  # Accumulate loss for this epoch
        for imgs, labels in train_loader:  # Loop over each batch in the training data
            imgs, labels = imgs.to(device), labels.to(device)  # Move data to GPU or CPU

            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(imgs)  # Forward pass: compute model predictions
            loss = criterion(outputs, labels)  # Compute loss between predictions and true labels
            loss.backward()  # Backward pass: compute gradients
            optimizer.step()  # Update model parameters

            running_loss += loss.item()  # Add batch loss to epoch total
        # Print average loss for this epoch
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")

# Evaluation
def test_model():
    model.eval()  # Set model to evaluation mode (disables dropout, uses running stats for batchnorm)
    correct = total = 0
    with torch.no_grad():  # Disable gradient computation for efficiency during evaluation
        for imgs, labels in test_loader:  # Iterate over the test dataset in batches
            imgs, labels = imgs.to(device), labels.to(device)  # Move data to the appropriate device (CPU or GPU)
            outputs = model(imgs)  # Get model predictions (logits) for the batch. shape: (batch_size, 2)
            _, preds = torch.max(outputs, 1)  # Get the predicted class (index of max logit) for each sample
            correct += (preds == labels).sum().item()  # Count how many predictions are correct in this batch
            total += labels.size(0)  # Update the total number of samples seen so far
    print(f"Test Accuracy: {100 * correct / total:.4f}%")


# Run training and testing
start_time = time.time() # <--- RECORD START TIME
train_model(num_epochs=30) # TUNE
end_time = time.time() # <--- RECORD END TIME
duration = end_time - start_time
print(f"Training time: {duration/60:.2f} minutes")
test_model()

Dataset loaded. Number of training samples: 3680 Number of test samples: 3669
ResNet18 model loaded.
Epoch 1, Loss: 0.1506
Epoch 2, Loss: 0.0655
Epoch 3, Loss: 0.0554
Epoch 4, Loss: 0.0579
Epoch 5, Loss: 0.0461
Epoch 6, Loss: 0.0479
Epoch 7, Loss: 0.0416
Epoch 8, Loss: 0.0423
Epoch 9, Loss: 0.0393
Epoch 10, Loss: 0.0345
Epoch 11, Loss: 0.0442
Epoch 12, Loss: 0.0354
Epoch 13, Loss: 0.0279
Epoch 14, Loss: 0.0283
Epoch 15, Loss: 0.0400
Epoch 16, Loss: 0.0367
Epoch 17, Loss: 0.0330
Epoch 18, Loss: 0.0366
Epoch 19, Loss: 0.0315
Epoch 20, Loss: 0.0311
Epoch 21, Loss: 0.0337
Epoch 22, Loss: 0.0269
Epoch 23, Loss: 0.0294
Epoch 24, Loss: 0.0236
Epoch 25, Loss: 0.0328
Epoch 26, Loss: 0.0243
Epoch 27, Loss: 0.0220
Epoch 28, Loss: 0.0258
Epoch 29, Loss: 0.0194
Epoch 30, Loss: 0.0500
Training time: 11.91 minutes
Test Accuracy: 99.0188%


**Test Accuracy**: 99.0188%

## Multi-class classification

In [22]:
# Define transforms for multi-class
train_transform_multi = transforms.Compose([
    #transforms.Resize(224), # DEFAULT
    #transforms.CenterCrop(224), # DEFAULT
    transforms.RandomResizedCrop(224, scale=(0.75, 1.0)), # Randomly crop the image to 224x224 with a scale of 75% to 100%
    transforms.RandomHorizontalFlip(), # Randomly flip the image horizontally with 50% probability
    #transforms.RandomRotation(15), # Randomly rotate the image by up to +/- 15 degrees
    #transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05), # Augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform_multi = transforms.Compose([ # Minimal for test/val
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# Custom Dataset wrapper to apply a specific transform
class TransformedDataset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.subset)


def setup_dataloaders(root_dir='./dataset', val_fraction=0.2, batch_size=32, num_workers=2, pin_memory=True,
                      train_transform=None, test_transform=None, imbalanced=False, cat_breed_fraction=0.2,
                      oversample_minority=False):
    """Loads data, splits, and creates DataLoaders."""
    # Load dataset for multi-class breed classification
    base_train_val_dataset = OxfordIIITPet(root=root_dir, split='trainval', target_types='category', download=True)
    test_dataset_multi_raw = OxfordIIITPet(root=root_dir, split='test', target_types='category', download=True)

    # Split training data for validation
    num_train_val_samples = len(base_train_val_dataset)
    num_val_samples = int(val_fraction * num_train_val_samples)
    num_train_samples_for_split = num_train_val_samples - num_val_samples

    # These subsets will contain (PIL Image, label) tuples
    train_subset_raw, val_subset_raw = random_split(base_train_val_dataset, [num_train_samples_for_split, num_val_samples])

     # --- Imbalanced logic for train set ---
    if imbalanced:
        selection_dataset = OxfordIIITPet(root=root_dir, split='trainval', target_types=['category', 'binary-category'], download=True)
        # Create a corresponding subset of the selection dataset
        train_selection_subset = torch.utils.data.Subset(selection_dataset, train_subset_raw.indices)
        imbalanced_indices = get_imbalanced_indices(train_selection_subset, cat_breed_fraction=cat_breed_fraction)
        train_subset_multi = TransformedDataset(torch.utils.data.Subset(train_subset_raw, imbalanced_indices), transform=train_transform)
    else:
        train_subset_multi = TransformedDataset(train_subset_raw, transform=train_transform)

    val_subset_multi = TransformedDataset(val_subset_raw, transform=test_transform)
    test_dataset_multi = TransformedDataset(test_dataset_multi_raw, transform=test_transform)
    
    # --- Over-sampling logic ---
    if oversample_minority:
        # Get all labels from the train subset
        labels = [y for _, y in train_subset_multi]
        class_sample_count = np.array([np.sum(np.array(labels) == t) for t in range(len(set(labels)))])
        class_weights = 1. / class_sample_count
        sample_weights = np.array([class_weights[label] for label in labels])
        sample_weights = torch.from_numpy(sample_weights).float()
        sampler = torch.utils.data.WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)
        train_loader = DataLoader(train_subset_multi, batch_size=batch_size, sampler=sampler, num_workers=num_workers, pin_memory=pin_memory)
    else:
        # shuffle=True shuffles the data each epoch
        train_loader = DataLoader(train_subset_multi, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory)

    # Create DataLoader objects for the actual data subsets
    val_loader = DataLoader(val_subset_multi, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)
    test_loader = DataLoader(test_dataset_multi, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory)

    print(f"Multi-class Dataset loaded. Training samples: {len(train_subset_multi)}, Validation samples: {len(val_subset_multi)}, Test samples: {len(test_dataset_multi)}")

    return train_loader, val_loader, test_loader


def train_one_epoch(model, train_loader, optimizer, criterion, device, batchnorm_mode="default"):
    """Trains the model for one epoch."""
    model.train()

    # Apply BatchNorm behavior based on the mode
    if batchnorm_mode == "freeze_params":
        for module in model.modules():
            if isinstance(module, nn.BatchNorm2d):
                for param in module.parameters():
                    param.requires_grad = False  # Freeze gamma and beta
    elif batchnorm_mode == "freeze_stats":
        for module in model.modules():
            if isinstance(module, nn.BatchNorm2d):
                module.eval()  # Freeze running_mean and running_var
    elif batchnorm_mode == "default":
        pass  # No need to explicitly set anything; rely on PyTorch's default behavior

    running_loss = 0.0
    for imgs, labels_batch in train_loader:
        imgs, labels_batch = imgs.to(device), labels_batch.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_epoch_loss = running_loss / len(train_loader)
    return avg_epoch_loss


def evaluate_model(model, loader_to_use, criterion, device):
    """Evaluates the model on a given loader."""
    model.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    with torch.no_grad():
        for imgs, labels_batch in loader_to_use:
            imgs, labels_batch = imgs.to(device), labels_batch.to(device)
            outputs = model(imgs)
            if criterion: # Calculate loss if criterion is provided
                loss = criterion(outputs, labels_batch)
                running_loss += loss.item()
            # For multi-class, outputs.shape will be (batch_size, 37)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels_batch).sum().item()
            total += labels_batch.size(0)
    accuracy = 100 * correct / total
    avg_loss = running_loss / len(loader_to_use) if criterion and len(loader_to_use) > 0 else 0.0
    return accuracy, avg_loss



### Strategy 1

In [4]:
def run_fine_tuning_strategy_1(num_epochs, lr_fc, lr_backbone, device,
                               train_loader, val_loader, test_loader,
                               num_classes=37, model_save_prefix="strategy1_best_model",
                               factor=0.1, patience=2, l2_lambda=0.0, batchnorm_mode="default",
                               use_weighted_loss=False):
    """
    Implements Strategy 1: Fine-tune l layers simultaneously with different LRs.
    """
    print("\nStarting Fine-Tuning Strategy 1...")
    start_time = time.time()

    val_accuracies_per_l = {}
    best_overall_val_accuracy = 0.0
    best_l_config_for_strategy = None
    
    max_l = 4 # For ResNet18

    # --- Compute class weights if needed ---
    weights = None
    if use_weighted_loss:
        # Compute class frequencies from the train_loader dataset
        all_labels = []
        for _, labels in train_loader.dataset:
            all_labels.append(labels)
        class_sample_count = np.array([np.sum(np.array(all_labels) == t) for t in range(num_classes)])
        weight = 1. / class_sample_count
        weights = torch.FloatTensor(weight).to(device)
        print(f"Using Weighted Cross-Entropy Loss. Class weights: {weights}")

    for l_val in range(1, max_l + 1):
        print(f"\n    STRATEGY 1: Training with FC + last {l_val} ResNet block(s) unfrozen")

        # Initialize model for current l_val
        current_model = resnet18(weights='IMAGENET1K_V1')
        # Freeze all parameters initially
        for param in current_model.parameters():
            param.requires_grad = False
            # Replace the final fully connected layer (always trainable). model.fc.parameters() are requires_grad=True by default
        current_model.fc = nn.Linear(current_model.fc.in_features, num_classes)
        
        # Unfreeze layers
        current_backbone_params = []
        if l_val >= 1: # Unfreeze layer4
            print("    Unfreezing model.layer4")
            for param in current_model.layer4.parameters():
                param.requires_grad = True
                current_backbone_params.append(param)
        if l_val >= 2: # Unfreeze layer3
            print("    Unfreezing model.layer3")
            for param in current_model.layer3.parameters():
                param.requires_grad = True
                current_backbone_params.append(param)
        if l_val >= 3: # Unfreeze layer2
            print("    Unfreezing model.layer2")
            for param in current_model.layer2.parameters():
                param.requires_grad = True
                current_backbone_params.append(param)
        if l_val >= 4: # Unfreeze layer1
            print("    Unfreezing model.layer1")
            for param in current_model.layer1.parameters():
                param.requires_grad = True
                current_backbone_params.append(param)
        
        current_model = current_model.to(device)
        
        # Optimizer and Criterion for current l_val
        if use_weighted_loss and weights is not None:
            criterion = nn.CrossEntropyLoss(weight=weights)
        else:
            criterion = nn.CrossEntropyLoss()
        
        optimizer_grouped_parameters = [{'params': current_model.fc.parameters(), 'lr': lr_fc}]
        if current_backbone_params:
            optimizer_grouped_parameters.append({'params': current_backbone_params, 'lr': lr_backbone})
        
        current_optimizer = optim.Adam(optimizer_grouped_parameters, weight_decay=l2_lambda)
       
        # --- Initialize ReduceLROnPlateau Scheduler ---
        # mode='max' for accuracy, 'min' for loss.
        # factor: Factor by which the learning rate will be reduced. new_lr = lr * factor.
        # patience: Number of epochs with no improvement after which learning rate will be reduced.
        # verbose=True: Prints a message when the learning rate is reduced.
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(current_optimizer, mode='max', factor=factor, patience=patience) # TUNE

        total_trainable_params_in_current_model = 0
        # Iterate over all parameters of the current_model being used for this l_val
        for param in current_model.parameters():
            if param.requires_grad:
                total_trainable_params_in_current_model += param.numel()
        print(f"    Number of trainable parameters for l={l_val}: {total_trainable_params_in_current_model}")

        print(f"    Starting training for l={l_val}, epochs={num_epochs}...")

        # Initialize a variable to track the previous learning rates
        previous_lrs = None
        for epoch in range(num_epochs):
            avg_train_loss = train_one_epoch(current_model, train_loader, current_optimizer, criterion, device, batchnorm_mode=batchnorm_mode)
            
            # Perform validation within the epoch loop for ReduceLROnPlateau
            epoch_val_accuracy, epoch_val_loss = evaluate_model(current_model, val_loader, criterion, device)
            
            # Step the ReduceLROnPlateau scheduler with the validation accuracy
            scheduler.step(epoch_val_accuracy)
            
            # Get the current learning rates
            current_lrs = [group['lr'] for group in current_optimizer.param_groups]

            # Print epoch details
            print(f"    l={l_val}, Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Acc: {epoch_val_accuracy:.2f}%, Val Loss: {epoch_val_loss:.4f}")
            
            # Print the learning rates if they have changed or if this is the first epoch
            if previous_lrs is None or current_lrs != previous_lrs:
                print(f"    Learning rates updated: {current_lrs}")
                previous_lrs = current_lrs  # Update the previous learning rates

        # Perform validation for current l_val
        print(f"    Validating for l={l_val}...")
        current_l_val_accuracy, current_l_val_loss = evaluate_model(current_model, val_loader, criterion, device)
        val_accuracies_per_l[l_val] = current_l_val_accuracy
        print(f"    Validation Accuracy for l={l_val}: {current_l_val_accuracy:.4f}%, Val Loss: {current_l_val_loss:.4f}")

        if current_l_val_accuracy > best_overall_val_accuracy:
            best_overall_val_accuracy = current_l_val_accuracy
            best_l_config_for_strategy = l_val
            # Save the best model's state_dict
            torch.save(current_model.state_dict(), f'{model_save_prefix}_l_{best_l_config_for_strategy}.pth')
            print(f"    Saved new best model (l={best_l_config_for_strategy})")

    end_time = time.time()
    strategy_duration_minutes = (end_time - start_time) / 60
    
    # After the loop, evaluate the best configuration on the test set
    final_test_accuracy_for_strategy = 0.0
    if best_l_config_for_strategy is not None:
        print(f"\n--- Strategy 1 Evaluation ---")
        print(f"Best l based on validation accuracy: {best_l_config_for_strategy} (Val Acc: {val_accuracies_per_l[best_l_config_for_strategy]:.4f}%)")
        print(f"Loading and evaluating best model (l={best_l_config_for_strategy}) on the Test Set...")
        
        # Re-setup the model architecture for the best_l_config
        best_model_for_strategy = resnet18(weights=None) # Initialize without pre-trained weights if loading all
        best_model_for_strategy.fc = nn.Linear(best_model_for_strategy.fc.in_features, num_classes) # Replace the final fully connected layer to match the saved model
        best_model_for_strategy.load_state_dict(torch.load(f'{model_save_prefix}_l_{best_l_config_for_strategy}.pth', weights_only=True)) # Load the saved state dictionary for the best model
        best_model_for_strategy = best_model_for_strategy.to(device)
        
        criterion_for_eval = nn.CrossEntropyLoss() # Re-init criterion for safety or pass it
        final_test_accuracy_for_strategy, _ = evaluate_model(best_model_for_strategy, test_loader, criterion_for_eval, device)
        print(f"Final Test Accuracy (best l={best_l_config_for_strategy}): {final_test_accuracy_for_strategy:.4f}%")
    else:
        print("No best configuration found for Strategy 1.")

    print(f"Training time: {strategy_duration_minutes:.2f} minutes")

In [None]:
# --- Run Strategy 1 ---

# These transforms are defined globally above
actual_train_loader, actual_val_loader, actual_test_loader = setup_dataloaders(
    train_transform=train_transform_multi,
    test_transform=test_transform_multi,
    batch_size=64 # TUNE
)


num_epochs_s1 = 15  # TUNE
# Learning rates for FC and backbone
lr_fc_s1 = 1e-3       # TUNE
lr_backbone_s1 = 1e-5 # TUNE
# Learning rate decay factor and patience for ReduceLROnPlateau
factor = 0.1 # TUNE
patience = 1 # TUNE
l2_lambda = 0.0 # L2 regularization TUNE

run_fine_tuning_strategy_1(
    num_epochs=num_epochs_s1,
    lr_fc=lr_fc_s1,
    lr_backbone=lr_backbone_s1,
    device=device,
    train_loader=actual_train_loader,
    val_loader=actual_val_loader,
    test_loader=actual_test_loader,
    num_classes=37,
    model_save_prefix="strategy1_best_model",
    factor=factor, patience=patience, l2_lambda=l2_lambda,
    batchnorm_mode="default" # TUNE ("freeze_params", "freeze_stats", "default")
)

----------------------------------------------------------------------------------------------------------------------------

- **Hyperparameter tuning**

(num_epochs_strat1 = 20, lr_strat1 = 1e-4)

--- Strategy 1 Finished ---
Best l based on validation accuracy: 3 (Accuracy: 90.7609%)
Loading and evaluating best model (l=3) on the Test Set...
Final Test Accuracy (with best l=3 config): 88.6618%
Strategy 1 training time: 12.88 minutes

(num_epochs_strat1 = 20, lr_strat1 = 1e-5)

--- Strategy 1 Finished ---
Best l based on validation accuracy: 2 (Accuracy: 89.5380%)
Loading and evaluating best model (l=2) on the Test Set...
Final Test Accuracy (with best l=2 config): 88.2529%
Strategy 1 training time: 17.58 minutes

(num_epochs_strat1 = 15, lr_strat1 = 1e-5)

--- Strategy 1 Finished ---
Best l based on validation accuracy: 3 (Accuracy: 91.8478%)
Loading and evaluating best model (l=3) on the Test Set...
Final Test Accuracy (with best l=3 config): 88.0076%
Strategy 1 training time: 12.47 minutes

> (num_epochs_strat1 = 15, lr_strat1 = 1e-4)
>
>--- Strategy 1 Finished ---
>Best l based on validation accuracy: 3 (Accuracy: 91.0326%)
>Loading and evaluating best model (l=3) on the Test Set...
>Final Test Accuracy (with best l=3 config): 89.0161%
>Strategy 1 training time: 16.78 minutes

- **Data augmentation**

**Adding RandomResizedCrop** (basically the same)

--- Strategy 1 Finished ---
Best l based on validation accuracy: 1 (Accuracy: 92.5272%)
Loading and evaluating best model (l=1) on the Test Set...
Final Test Accuracy (with best l=1 config): 89.1251%
Strategy 1 training time: 10.66 minutes

> **Adding RandomHorizontalFlip** (almost the same)
>
> --- Strategy 1 Finished ---
> Best l based on validation accuracy: 3 (Accuracy: 92.6630%)
> Loading and evaluating best model (l=3) on the Test Set...
> Final Test Accuracy (with best l=3 config): 89.8337%
> Strategy 1 training time: 10.72 minutes

**Adding RandomRotation** (decreases accuracy in many runs consistently) NOT ADDED

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 2 (Val Acc: 92.5272%)
Loading and evaluating best model (l=2) on the Test Set...
Final Test Accuracy (best l=2): 87.7351%
Training time: 11.07 minutes

**Conclusion**: RandomRotation decreased the performanced. Not added.

- **L2 regularization**

l2_lambda = 1e-3

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 1 (Val Acc: 91.7120%)
Loading and evaluating best model (l=1) on the Test Set...
Final Test Accuracy (best l=1): 89.5067%
Training time: 13.22 minutes


l2_lambda = 1e-4

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 2 (Val Acc: 93.0707%)
Loading and evaluating best model (l=2) on the Test Set...
Final Test Accuracy (best l=2): 89.8065%
Training time: 12.04 minutes

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 1 (Val Acc: 92.5272%)
Loading and evaluating best model (l=1) on the Test Set...
Final Test Accuracy (best l=1): 89.8337%
Training time: 12.15 minutes

l2_lambda = 1e-5

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 1 (Val Acc: 91.4402%)
Loading and evaluating best model (l=1) on the Test Set...
Final Test Accuracy (best l=1): 88.9888%
Training time: 12.16 minutes

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 1 (Val Acc: 92.3913%)
Loading and evaluating best model (l=1) on the Test Set...
Final Test Accuracy (best l=1): 89.1524%
Training time: 13.61 minutes

**Conclusion**: Regularization didn't show consistent improvement. Won't be added.

- **Different learning rates and learning rate schedulers for different layers**

Batch size changed from 32 to 64.

lr_fc_s1 = 1e-4 , lr_backbone_s1 = 1e-5:

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 3 (Val Acc: 91.7120%)
Loading and evaluating best model (l=3) on the Test Set...
Final Test Accuracy (best l=3): 90.2153%
Training time: 14.09 minutes



> lr_fc_s1 = 1e-3, lr_backbone_s1 = 1e-5:
> 
> --- Strategy 1 Evaluation ---
> Best l based on validation accuracy: 3 (Val Acc: 94.4293%)
> Loading and evaluating best model (l=3) on the Test Set...
> Final Test Accuracy (best l=3): 90.5424%
> Training time: 17.44 minutes
>
> --- Strategy 1 Evaluation ---
> Best l based on validation accuracy: 4 (Val Acc: 94.1576%)
> Loading and evaluating best model (l=4) on the Test Set...
> Final Test Accuracy (best l=4): 90.6241%
> Training time: 12.65 minutes

**Conclusion**: The performance improved using different learning rates and a scheduler.

- **Effect of fine-tuning or not the batch-norm parameters and updating the estimate of the batch mean and standard deviations on the final performance on the new dataset**

Freeze batch-norm params:

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 4 (Val Acc: 92.9348%)
Loading and evaluating best model (l=4) on the Test Set...
Final Test Accuracy (best l=4): 90.8149%
Training time: 12.58 minutes

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 2 (Val Acc: 93.0707%)
Loading and evaluating best model (l=2) on the Test Set...
Final Test Accuracy (best l=2): 89.9428%
Training time: 12.78 minutes


Freeze batch stats:

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 4 (Val Acc: 91.8478%)
Loading and evaluating best model (l=4) on the Test Set...
Final Test Accuracy (best l=4): 90.8422%
Training time: 12.30 minutes

--- Strategy 1 Evaluation ---
Best l based on validation accuracy: 2 (Val Acc: 91.0326%)
Loading and evaluating best model (l=2) on the Test Set...
Final Test Accuracy (best l=2): 90.4061%
Training time: 12.95 minutes

**Conclusion**: Validation accuracy decreased slightly when frozen, so both should be kept.

### Strategy 2

In [None]:
def run_fine_tuning_strategy_2(lr_fc, lr_backbone, device,
                               train_loader, val_loader, test_loader,
                               num_classes=37, model_save_prefix="strategy2_best_model",
                               factor=0.1, patience=2, l2_lambda=0.0, batchnorm_mode="default",
                               unfreeze_schedule=None):
    """
    Implements Strategy 2: Gradual unfreezing of layers during fine-tuning.
    """
    print("\nStarting Fine-Tuning Strategy 2...")
    start_time = time.time()

    val_accuracies_per_stage = {}
    best_overall_val_accuracy = 0.0
    best_stage_config = None

    # Initialize model
    current_model = resnet18(weights='IMAGENET1K_V1')
    # Freeze all parameters initially
    for param in current_model.parameters():
        param.requires_grad = False
    current_model.fc = nn.Linear(current_model.fc.in_features, num_classes)
    current_model = current_model.to(device)

    # Optimizer and Criterion
    criterion = nn.CrossEntropyLoss()

    # Default unfreeze schedule if none is provided
    if unfreeze_schedule is None:
        unfreeze_schedule = [
            {"layers_to_unfreeze": ["layer4"], "epochs": 5},
            {"layers_to_unfreeze": ["layer3"], "epochs": 5},
            {"layers_to_unfreeze": ["layer2"], "epochs": 5},
            {"layers_to_unfreeze": ["layer1"], "epochs": 5},
        ]

    total_epochs = sum(stage["epochs"] for stage in unfreeze_schedule)
    current_epoch = 0

    for stage_idx, stage in enumerate(unfreeze_schedule):
        layers_to_unfreeze = stage["layers_to_unfreeze"]
        stage_epochs = stage["epochs"]

        print(f"\nStage {stage_idx + 1}: Unfreezing layers {layers_to_unfreeze} for {stage_epochs} epochs...")

        # Unfreeze specified layers
        current_backbone_params = []
        for layer_name in layers_to_unfreeze:
            layer = getattr(current_model, layer_name)
            for param in layer.parameters():
                param.requires_grad = True
                current_backbone_params.append(param)

        # Define optimizer with updated parameter groups
        optimizer_grouped_parameters = [{'params': current_model.fc.parameters(), 'lr': lr_fc}]
        if current_backbone_params:
            optimizer_grouped_parameters.append({'params': current_backbone_params, 'lr': lr_backbone})
        current_optimizer = optim.Adam(optimizer_grouped_parameters, weight_decay=l2_lambda)

        # Scheduler
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(current_optimizer, mode='max', factor=factor, patience=patience)

        # Initialize a variable to track the previous learning rates
        previous_lrs = None
        
        # Train for the specified number of epochs in this stage
        for epoch in range(stage_epochs):
            current_epoch += 1
            avg_train_loss = train_one_epoch(current_model, train_loader, current_optimizer, criterion, device, batchnorm_mode=batchnorm_mode)
        
            # Perform validation
            epoch_val_accuracy, epoch_val_loss = evaluate_model(current_model, val_loader, criterion, device)
            scheduler.step(epoch_val_accuracy)
        
            # Get the current learning rates
            current_lrs = [group['lr'] for group in current_optimizer.param_groups]
        
            # Print epoch details
            print(f"    Epoch {current_epoch}/{total_epochs} (Stage {stage_idx + 1}), Train Loss: {avg_train_loss:.4f}, Val Acc: {epoch_val_accuracy:.2f}%, Val Loss: {epoch_val_loss:.4f}")
        
            # Print the learning rates only if they have changed or if this is the first epoch
            if previous_lrs is None or current_lrs != previous_lrs:
                print(f"    Learning rates updated: {current_lrs}")
                previous_lrs = current_lrs  # Update the previous learning rates

        # Validate after the stage
        print(f"\nValidating after Stage {stage_idx + 1}...")
        stage_val_accuracy, stage_val_loss = evaluate_model(current_model, val_loader, criterion, device)
        val_accuracies_per_stage[stage_idx + 1] = stage_val_accuracy
        print(f"Validation Accuracy after Stage {stage_idx + 1}: {stage_val_accuracy:.4f}%, Val Loss: {stage_val_loss:.4f}")

        # Save the best model
        if stage_val_accuracy > best_overall_val_accuracy:
            best_overall_val_accuracy = stage_val_accuracy
            best_stage_config = stage_idx + 1
            torch.save(current_model.state_dict(), f'{model_save_prefix}_stage_{best_stage_config}.pth')
            print(f"    Saved new best model (Stage {best_stage_config})")
    
    end_time = time.time()

    # Evaluate the best model on the test set
    final_test_accuracy_for_strategy = 0.0
    if best_stage_config is not None:
        print(f"\n--- Strategy 2 Evaluation ---")
        print(f"Best stage based on validation accuracy: {best_stage_config} (Val Acc: {val_accuracies_per_stage[best_stage_config]:.4f}%)")
        print(f"Loading and evaluating best model (Stage {best_stage_config}) on the Test Set...")

        # Re-setup the model architecture
        best_model_for_strategy = resnet18(weights=None)
        best_model_for_strategy.fc = nn.Linear(best_model_for_strategy.fc.in_features, num_classes)
        best_model_for_strategy.load_state_dict(torch.load(f'{model_save_prefix}_stage_{best_stage_config}.pth', weights_only=True))
        best_model_for_strategy = best_model_for_strategy.to(device)

        criterion_for_eval = nn.CrossEntropyLoss()
        final_test_accuracy_for_strategy, _ = evaluate_model(best_model_for_strategy, test_loader, criterion_for_eval, device)
        print(f"Final Test Accuracy (best stage={best_stage_config}): {final_test_accuracy_for_strategy:.4f}%")
    else:
        print("No best configuration found for Strategy 2.")

    strategy_duration_minutes = (end_time - start_time) / 60
    print(f"Training time: {strategy_duration_minutes:.2f} minutes")

In [None]:
# --- Run Strategy 2 ---

# These transforms are defined globally above
actual_train_loader, actual_val_loader, actual_test_loader = setup_dataloaders(
    train_transform=train_transform_multi,
    test_transform=test_transform_multi,
    batch_size=64 # TUNE
)


unfreeze_schedule = [
    {"layers_to_unfreeze": ["layer4"], "epochs": 5}, # First stage unfreezes layer4 and trains for 5 epochs
    {"layers_to_unfreeze": ["layer3"], "epochs": 5}, # etc
    {"layers_to_unfreeze": ["layer2"], "epochs": 5},
    {"layers_to_unfreeze": ["layer1"], "epochs": 5},
]


run_fine_tuning_strategy_2(
    lr_fc=1e-3, # TUNE
    lr_backbone=1e-5, # TUNE
    device=device,
    train_loader=actual_train_loader,
    val_loader=actual_val_loader,
    test_loader=actual_test_loader,
    num_classes=37,
    model_save_prefix="strategy2_best_model",
    factor=0.1,
    patience=1,
    l2_lambda=1e-4, # TUNE 0.0
    batchnorm_mode="default",
    unfreeze_schedule=unfreeze_schedule
)

----------------------------------------------------------------------------------------------------------------------------

--- Strategy 2 Evaluation ---
Best stage based on validation accuracy: 2 (Val Acc: 93.8859%)
Loading and evaluating best model (Stage 2) on the Test Set...
Final Test Accuracy (best stage=2): 90.0245%
Training time: 4.15 minutes

--- Strategy 2 Evaluation ---
Best stage based on validation accuracy: 3 (Val Acc: 93.0707%)
Loading and evaluating best model (Stage 3) on the Test Set...
Final Test Accuracy (best stage=3): 90.2153%
Training time: 4.87 minutes

--- Strategy 2 Evaluation ---
Best stage based on validation accuracy: 3 (Val Acc: 93.6141%)
Loading and evaluating best model (Stage 3) on the Test Set...
Final Test Accuracy (best stage=3): 89.6157%
Training time: 4.83 minutes


**Conclusion**: After fine tuning strategy 2, no better performance was achieved compared to strategy 1.



## Imbalanced classes

In [None]:
def get_imbalanced_indices(dataset, cat_breed_fraction=0.2):
    """
    Returns indices for an imbalanced subset: only cat_breed_fraction of each CAT breed,
    but ALL images for dog breeds.
    Assumes dataset returns (img, (category_label, binary_label))
    """
    class_to_indices = defaultdict(list)
    for idx, (_, (category_label, binary_label)) in enumerate(dataset):
        class_to_indices[category_label].append(idx)
    selected_indices = []
    for category_label, indices in class_to_indices.items():
        # Check the binary label of the first sample for this breed, since all samples of the same breed have the same binary label
        _, first_binary_label = dataset[indices[0]][1]
        if first_binary_label == 0:  # Cat breed
            n_select = max(1, int(len(indices) * cat_breed_fraction))
            selected_indices.extend(random.sample(indices, n_select))
        else:  # Dog breed
            selected_indices.extend(indices)
    return selected_indices

print("Loading imbalanced dataset...")

imbalanced_train_loader, val_loader, test_loader = setup_dataloaders(
    train_transform=train_transform_multi,
    test_transform=test_transform_multi,
    batch_size=64,
    imbalanced=True,
    cat_breed_fraction=0.2,
    oversample_minority=True # <--- Activate oversampling of minority classes
)


# Run Strategy 1 with the imbalanced train loader
run_fine_tuning_strategy_1(
    num_epochs=15, # TUNE
    lr_fc=1e-3, # TUNE
    lr_backbone=1e-5, # TUNE
    device=device,
    train_loader=imbalanced_train_loader,
    val_loader=val_loader,
    test_loader=test_loader,
    num_classes=37,
    model_save_prefix="strategy1_imbalanced_model",
    factor=0.1, patience=1, l2_lambda=0.0, # TUNE
    batchnorm_mode="default",
    use_weighted_loss=True  # <--- Activate weighted loss
)

----------------------------------------------------------------------------------------------------------------------------

- **Results after imbalanced classes (only 20% for each cat breed):**

- **Weighted Cross-Entropy Loss:**

- **Over-sampling of the minority classe**

- **Weighted Cross-Entropy Loss AND Over-sampling of the minority classe**