In [None]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision
from torchvision.datasets import ImageFolder
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
import matplotlib.pyplot as plt

In [3]:
train_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomGrayscale(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees = 90),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
])

normalize_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
])

In [None]:
os.makedirs("results", exist_ok=True)
os.makedirs("plots", exist_ok=True)

test_path = './catdog_data/test'
train_path = './catdog_data/train'
validation_path = './catdog_data/validation'

train_data = ImageFolder(root = train_path, transform = train_transform)
test_data = ImageFolder(root = test_path, transform = normalize_transform)
validation_data = ImageFolder(root = validation_path, transform = normalize_transform)

In [None]:
import pickle

class CNN(nn.Module):

    def __init__(self, activation_function):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(128),
            activation_function,
            nn.MaxPool2d(kernel_size = 2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(64),
            activation_function,
            nn.MaxPool2d(kernel_size = 2)
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 32, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(32),
            activation_function,
            nn.MaxPool2d(kernel_size = 2)
        )

        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.dropout = nn.Dropout(0.25)
        self.fc2 = nn.Linear(128, 2)
        self.act = activation_function
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.act(self.fc1(out))
        out = self.fc2(out)
        return out

def train_epoch(model, dataloader, criterion, optimizer, device, regularization_type=None, lambda_reg=0.001):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for data, target in dataloader:
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)

        # manual regularization
        if regularization_type == 'L1':
            l1_norm = sum(p.abs().sum() for p in model.parameters())
            loss = loss + lambda_reg * l1_norm
        elif regularization_type == 'L2':
            l2_norm = sum(p.pow(2).sum() for p in model.parameters())
            loss = loss + lambda_reg * l2_norm
        # if None -> no manual reg

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * data.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = 100 * correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item() * data.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    avg_loss = val_loss / len(dataloader.dataset)
    accuracy = 100 * correct / total
    return avg_loss, accuracy

batch_sizes = [64, 128, 256]
learning_rates = [0.1, 0.01, 0.001]
activation_functions = {
    'ReLU': nn.ReLU(),
    'Sigmoid': nn.Sigmoid(),
    'Tanh': nn.Tanh(),
    'LeakyReLU': nn.LeakyReLU(),
    'LogSigmoid': nn.LogSigmoid(),
    'ELU': nn.ELU(),
    'SiLU': nn.SiLU(),
    'Softplus': nn.Softplus()
}
optimizers = {
    'SGD': optim.SGD,
    'Adam': optim.Adam
}
regularizations = [None, 'L1', 'L2']
reg_lambda = {
    None: 0.0,
    'L1': 0.001,
    'L2': 0.01
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 15

all_results = []
summary_records = []

print(f"Using device: {device}")
print("Beginning grid search...")


for act_name, activation_function in activation_functions.items():
    print(f"\n=== Activation: {act_name} ===")

    activation_results = []

    for batch_size in batch_sizes:

        train_dataloader = DataLoader(
            dataset = train_data,
            batch_size = batch_size,
            shuffle = True,
            num_workers = 2
        )

        validation_dataloader = DataLoader(
            dataset = validation_data,
            batch_size = batch_size,
            shuffle = False,
            num_workers = 2
        )

        test_dataloader = DataLoader(
            dataset = test_data,
            batch_size = batch_size,
            shuffle = False,
            num_workers = 2
        )

        for reg in regularizations:
            print(f"\n-- Batch size: {batch_size} | Regulatization: {reg} --")

            for learning_rate in learning_rates:
                for opt_name, optimizer in optimizers.items():
                    print(f"Training: act={act_name}, bs={batch_size}, reg={reg}, lr={learning_rate}, opt={opt_name}")

                    model = CNN(activation_function).to(device)
                    criterion = nn.CrossEntropyLoss()
                    optimizer = optimizer(model.parameters(), lr = learning_rate)

                    train_loss_history = []
                    val_loss_history = []
                    val_accuracy_history = []

                    for epoch in range(num_epochs):
                        train_loss, train_acc = train_epoch(
                            model,
                            train_dataloader,
                            criterion,
                            optimizer,
                            device,
                            regularization_type = reg,
                            lambda_reg = reg_lambda[reg]
                        )
                        val_loss, val_accuracy = validate(model, validation_dataloader, criterion, device)

                        train_loss_history.append(train_loss)
                        val_loss_history.append(val_loss)
                        val_accuracy_history.append(val_accuracy)

                        print(f"Epoch [{epoch + 1}/{num_epochs}] | Train Loss: {train_loss:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_accuracy:.2f} %")

                    best_val_acc = max(val_accuracy_history)
                    best_epoch = val_accuracy_history.index(best_val_acc) + 1

                    exp_result = {
                        "activation_function": act_name,
                        "batch_size": batch_size,
                        "regularization": reg,
                        "learning_rate": learning_rate,
                        "optimizer": opt_name,
                        "train_loss_history": train_loss_history,
                        "val_loss_history": val_loss_history,
                        "val_accuracy_history": val_accuracy_history
                    }

                    all_results.append(exp_result)

                    activation_results.append(exp_result)

                    summary_records.append({
                        "activation_function": act_name,
                        "batch_size": batch_size,
                        "regularization": reg,
                        "learning_rate": learning_rate,
                        "optimizer": opt_name,
                        "best_val_accuracy": best_val_acc,
                        "best_epoch": best_epoch
                    })

    per_act_path = os.path.join("results", f"cnn_experiments_{act_name}.pkl")
    with open(per_act_path, "wb") as f:
        pickle.dump(activation_results, f)
    print(f"Saved results for {act_name} -> {per_act_path}")

    fig, axes = plt.subplots(len(batch_sizes), len(regularizations), figsize=(16, 12), sharey=True)
    if len(batch_sizes) == 1:
        axes = axes.reshape(1, -1)
    for i, bs in enumerate(batch_sizes):
        for j, reg in enumerate(regularizations):
            ax = axes[i, j]
            # select experiments matching this activation, bs, reg
            matching = [r for r in activation_results if (r['batch_size'] == bs and r['regularization'] == reg)]
            if not matching:
                ax.set_title(f"bs={bs}, reg={reg} (no runs)")
                continue

            for run in matching:
                label = f"LR={run['learning_rate']}, OPT={run['optimizer']}"
                ax.plot(run['val_accuracy_history'], label=label, marker='o')

            ax.set_title(f"bs={bs}, reg={reg}")
            ax.set_xlabel("Epoch")
            if j == 0:
                ax.set_ylabel("Validation Accuracy (%)")
            ax.legend(fontsize=7, loc='lower right')
            ax.grid(True)

    fig.suptitle(f"Validation Accuracy per Epoch - Activation: {act_name}", fontsize=16)
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])

    plot_path = os.path.join("plots", f"cnn_plots_{act_name}.png")
    fig.savefig(plot_path, dpi=200)
    print(f"Saved plot for {act_name} -> {plot_path}")
    plt.show()
    plt.close(fig)

# --- Save combined results ---
combined_path = os.path.join("results", "cnn_experiments_all.pkl")
with open(combined_path, "wb") as f:
    pickle.dump(all_results, f)

# --- Create and save summary CSV ---
summary_df = pd.DataFrame(summary_records)
summary_csv_path = os.path.join("results", "cnn_experiments_summary.csv")
summary_df.to_csv(summary_csv_path, index=False)
print(f"\nSummary CSV saved -> {summary_csv_path}")

print(f"\nAll experiments complete. Combined results saved -> {combined_path}")

Testing parameters for activation function Softplus

=== Batch size: 64 ===

 --- Learning rate: 0.1 ---

Optimizing Softplus with Stochastic Gradient Descent...
Epoch [1/15], Validation Loss: 1.9827, Validation Accuracy: 50.00 %
Epoch [2/15], Validation Loss: 0.8414, Validation Accuracy: 50.00 %
Epoch [3/15], Validation Loss: 0.7081, Validation Accuracy: 50.00 %
Epoch [4/15], Validation Loss: 0.6976, Validation Accuracy: 50.00 %
Epoch [5/15], Validation Loss: 0.6941, Validation Accuracy: 50.00 %
Epoch [6/15], Validation Loss: 0.8530, Validation Accuracy: 50.00 %
Epoch [7/15], Validation Loss: 0.7458, Validation Accuracy: 50.00 %
Epoch [8/15], Validation Loss: 0.6991, Validation Accuracy: 50.00 %
Epoch [9/15], Validation Loss: 0.6966, Validation Accuracy: 50.00 %
Epoch [10/15], Validation Loss: 0.6950, Validation Accuracy: 50.00 %
Epoch [11/15], Validation Loss: 0.7008, Validation Accuracy: 50.00 %
Epoch [12/15], Validation Loss: 0.6942, Validation Accuracy: 50.00 %
Epoch [13/15], Vali