<a href="https://colab.research.google.com/github/esraalmaeeni/DL-lab/blob/main/exp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import copy
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes=10, activation='relu', input_size=32):
        """
        num_classes: Number of output classes.
        activation: 'relu', 'tanh', or 'leaky_relu'.
        input_size: Spatial size of the input image (e.g. 32 for CIFAR‑10, 64 for Cats vs. Dogs).
        """
        super(CustomCNN, self).__init__()

        # Select the activation function
        if activation.lower() == 'relu':
            self.act = nn.ReLU()
        elif activation.lower() == 'tanh':
            self.act = nn.Tanh()
        elif activation.lower() == 'leaky_relu':
            self.act = nn.LeakyReLU(negative_slope=0.1)
        else:
            raise ValueError("Unsupported activation function: choose 'relu', 'tanh', or 'leaky_relu'")

        # First Convolutional Block
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            self.act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        # Second Convolutional Block
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            self.act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        # Third Convolutional Block
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            self.act,
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        # Determine final feature map size (after 3 poolings, each dividing by 2)
        feature_map_size = input_size // 8
        self.feature_size = 128 * feature_map_size * feature_map_size

        # Fully Connected Layers
        self.fc1 = nn.Sequential(
            nn.Linear(self.feature_size, 256),
            self.act,
            nn.Dropout(0.5)
        )
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        x = self.fc2(x)
        return x


In [None]:
def initialize_weights(model, init_type='xavier'):
    """
    init_type: 'xavier', 'kaiming', or 'random'
    """
    for m in model.modules():
        if isinstance(m, (nn.Conv2d, nn.Linear)):
            if init_type.lower() == 'xavier':
                nn.init.xavier_uniform_(m.weight)
            elif init_type.lower() == 'kaiming':
                # Note: If your activation is Tanh, Kaiming may be less optimal.
                nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
            elif init_type.lower() == 'random':
                nn.init.normal_(m.weight, mean=0, std=0.02)
            else:
                raise ValueError("Unsupported initialization type")
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)


In [None]:
def get_optimizer(model, optimizer_name='adam', lr=1e-3):
    if optimizer_name.lower() == 'sgd':
        return optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optimizer_name.lower() == 'adam':
        return optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name.lower() == 'rmsprop':
        return optim.RMSprop(model.parameters(), lr=lr)
    else:
        raise ValueError("Unsupported optimizer")


In [None]:
def train_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += torch.sum(preds == labels).item()
        total += inputs.size(0)
    return running_loss / total, correct / total

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels).item()
            total += inputs.size(0)
    return running_loss / total, correct / total


In [None]:
# Transformations for CIFAR-10
transform_cifar_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
transform_cifar_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

cifar_trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cifar_train)
cifar_testset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar_test)

trainloader_cifar = DataLoader(cifar_trainset, batch_size=128, shuffle=True, num_workers=2)
testloader_cifar  = DataLoader(cifar_testset, batch_size=128, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Transformations for Cats vs. Dogs (resized to 64x64)
transform_catsdogs = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Folder structure: ./data/cats_vs_dogs/train and ./data/cats_vs_dogs/val
catsdogs_trainset = datasets.ImageFolder(os.path.join('./data/cats_vs_dogs', 'train'), transform=transform_catsdogs)
catsdogs_valset   = datasets.ImageFolder(os.path.join('./data/cats_vs_dogs', 'val'), transform=transform_catsdogs)

trainloader_catsdogs = DataLoader(catsdogs_trainset, batch_size=32, shuffle=True, num_workers=2)
valloader_catsdogs   = DataLoader(catsdogs_valset, batch_size=32, shuffle=False, num_workers=2)


FileNotFoundError: [Errno 2] No such file or directory: './data/cats_vs_dogs/train'

In [None]:
# Hyperparameter configurations
activations = ['relu', 'tanh', 'leaky_relu']
init_methods = ['xavier', 'kaiming', 'random']
optimizers_list = ['sgd', 'adam', 'rmsprop']

num_epochs = 10       # Adjust as needed
learning_rate = 1e-3
num_classes = 10      # For CIFAR-10
input_size = 32       # CIFAR-10 images are 32x32

best_overall_acc = 0.0
best_config = {}
best_model_state = None

for act in activations:
    for init_method in init_methods:
        for opt_name in optimizers_list:
            print(f"\n=== Training with Activation: {act}, Initialization: {init_method}, Optimizer: {opt_name} ===")

            # Initialize the model
            model = CustomCNN(num_classes=num_classes, activation=act, input_size=input_size).to(device)
            initialize_weights(model, init_type=init_method)

            # Setup optimizer and loss criterion
            optimizer = get_optimizer(model, optimizer_name=opt_name, lr=learning_rate)
            criterion = nn.CrossEntropyLoss()

            best_val_acc = 0.0
            best_model_wts = copy.deepcopy(model.state_dict())

            # Training loop
            for epoch in range(num_epochs):
                start_time = time.time()
                train_loss, train_acc = train_epoch(model, trainloader_cifar, optimizer, criterion, device)
                val_loss, val_acc = evaluate(model, testloader_cifar, criterion, device)
                elapsed = time.time() - start_time

                print(f"Epoch {epoch+1}/{num_epochs} - "
                      f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
                      f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f} | Time: {elapsed:.2f}s")

                # Save model weights if validation accuracy improves
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

            print(f"Best validation accuracy for current configuration: {best_val_acc:.4f}")

            # Check if this configuration is the best overall
            if best_val_acc > best_overall_acc:
                best_overall_acc = best_val_acc
                best_config = {'activation': act, 'init': init_method, 'optimizer': opt_name}
                best_model_state = best_model_wts

print("\n=== Best Overall Configuration on CIFAR-10 ===")
print(best_config)
print(f"Validation Accuracy: {best_overall_acc:.4f}")

# Save the best model weights
torch.save(best_model_state, 'best_cifar_model.pth')



=== Training with Activation: relu, Initialization: xavier, Optimizer: sgd ===
Epoch 1/10 - Train Loss: 2.1013, Train Acc: 0.2440 | Val Loss: 1.7742, Val Acc: 0.3754 | Time: 158.67s
Epoch 2/10 - Train Loss: 1.7666, Train Acc: 0.3442 | Val Loss: 1.5714, Val Acc: 0.4361 | Time: 150.59s
Epoch 3/10 - Train Loss: 1.6409, Train Acc: 0.3941 | Val Loss: 1.4825, Val Acc: 0.4658 | Time: 149.26s
Epoch 4/10 - Train Loss: 1.5740, Train Acc: 0.4155 | Val Loss: 1.4071, Val Acc: 0.4985 | Time: 149.05s
Epoch 5/10 - Train Loss: 1.5138, Train Acc: 0.4443 | Val Loss: 1.3744, Val Acc: 0.5053 | Time: 153.63s
Epoch 6/10 - Train Loss: 1.4716, Train Acc: 0.4608 | Val Loss: 1.3303, Val Acc: 0.5236 | Time: 150.82s
Epoch 7/10 - Train Loss: 1.4374, Train Acc: 0.4725 | Val Loss: 1.2668, Val Acc: 0.5462 | Time: 149.23s
Epoch 8/10 - Train Loss: 1.3964, Train Acc: 0.4911 | Val Loss: 1.2443, Val Acc: 0.5512 | Time: 151.26s
Epoch 9/10 - Train Loss: 1.3720, Train Acc: 0.5039 | Val Loss: 1.2422, Val Acc: 0.5507 | Time: 1

In [None]:
# Load pretrained ResNet-18
resnet18 = models.resnet18(pretrained=True)
in_features = resnet18.fc.in_features
resnet18.fc = nn.Linear(in_features, num_classes)  # Adjust output layer
resnet18 = resnet18.to(device)

# Option: Fine-tune all layers (or freeze some layers as needed)
for param in resnet18.parameters():
    param.requires_grad = True

# Setup optimizer and loss
optimizer_resnet = get_optimizer(resnet18, optimizer_name='adam', lr=learning_rate)
criterion_resnet = nn.CrossEntropyLoss()

best_resnet_acc = 0.0
best_resnet_wts = copy.deepcopy(resnet18.state_dict())

print("\n=== Fine-tuning Pretrained ResNet-18 ===")
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss, train_acc = train_epoch(resnet18, trainloader_cifar, optimizer_resnet, criterion_resnet, device)
    val_loss, val_acc = evaluate(resnet18, testloader_cifar, criterion_resnet, device)
    elapsed = time.time() - start_time

    print(f"ResNet Epoch {epoch+1}/{num_epochs} - "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f} | Time: {elapsed:.2f}s")

    if val_acc > best_resnet_acc:
        best_resnet_acc = val_acc
        best_resnet_wts = copy.deepcopy(resnet18.state_dict())

print(f"Best ResNet-18 Validation Accuracy: {best_resnet_acc:.4f}")
torch.save(best_resnet_wts, 'best_resnet_model.pth')
