## RELU

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import optuna
import torch.nn.functional as F

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the Optuna objective function
def objective(trial):
    # Hyperparameters to tune
    num_filters1 = trial.suggest_categorical("num_filters1", [32, 64, 128])
    num_filters2 = trial.suggest_categorical("num_filters2", [64, 128, 256])
    kernel_size = trial.suggest_categorical("kernel_size", [3, 5])
    pooling_type = trial.suggest_categorical("pooling", ["max", "avg"])
    fc_size = trial.suggest_categorical("fc_size", [128, 256, 512])
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, num_filters1, kernel_size, padding=1)
            self.conv2 = nn.Conv2d(num_filters1, num_filters2, kernel_size, padding=1)
            self.pool = nn.MaxPool2d(2, 2) if pooling_type == "max" else nn.AvgPool2d(2, 2)
            #self.fc1 = nn.Linear(num_filters2 * 8 * 8, fc_size)
            self._to_linear = None  # Placeholder for dynamically determined size
            self.fc2 = nn.Linear(fc_size, 10)
            self.dropout = nn.Dropout(dropout_rate)


        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            if self._to_linear is None:  # Calculate dynamically
                self._to_linear = x.view(x.size(0), -1).shape[1]
                self.fc1 = nn.Linear(self._to_linear, fc_size).to(device)  # Initialize after knowing size
            x = x.view(x.size(0), -1)
            x = self.dropout(x)
            x = self.fc2(x)
            return x

        # def forward(self, x):
        #     x = self.pool(F.relu(self.conv1(x)))
        #     x = self.pool(F.relu(self.conv2(x)))
        #     x = x.view(x.size(0), -1)
        #     #print("Flattened size:", x.shape)
        #     x = F.relu(self.fc1(x))
        #     x = self.dropout(x)
        #     x = self.fc2(x)
        #     return x

    # Initialize model, loss function, and optimizer
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    num_epochs = 3  # Small value for quick tuning
    for epoch in range(num_epochs):
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    return accuracy  # Optuna will maximize this


Files already downloaded and verified
Files already downloaded and verified


In [2]:
study = optuna.create_study(direction="maximize")  # We want to maximize accuracy
study.optimize(objective, n_trials=10)  # Try 10 different sets of hyperparameters

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)

[I 2025-03-18 00:37:05,670] A new study created in memory with name: no-name-44c3efc1-81c1-4a0d-920c-a7808b132a54
[I 2025-03-18 00:37:44,284] Trial 0 finished with value: 0.4422 and parameters: {'num_filters1': 32, 'num_filters2': 256, 'kernel_size': 3, 'pooling': 'avg', 'fc_size': 512, 'dropout_rate': 0.2544680684127717, 'learning_rate': 0.00022599420266733583}. Best is trial 0 with value: 0.4422.
[I 2025-03-18 00:38:22,036] Trial 1 finished with value: 0.4619 and parameters: {'num_filters1': 32, 'num_filters2': 64, 'kernel_size': 3, 'pooling': 'avg', 'fc_size': 512, 'dropout_rate': 0.22605048829444196, 'learning_rate': 0.0003669976304852994}. Best is trial 1 with value: 0.4619.
[I 2025-03-18 00:39:00,236] Trial 2 finished with value: 0.5661 and parameters: {'num_filters1': 128, 'num_filters2': 64, 'kernel_size': 5, 'pooling': 'avg', 'fc_size': 512, 'dropout_rate': 0.46175040925588634, 'learning_rate': 0.0010871993567913908}. Best is trial 2 with value: 0.5661.
[I 2025-03-18 00:39:38,

Best hyperparameters: {'num_filters1': 64, 'num_filters2': 256, 'kernel_size': 3, 'pooling': 'max', 'fc_size': 256, 'dropout_rate': 0.4535517889809642, 'learning_rate': 0.0027866759720100087}


## Leaky Relu

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import optuna
import torch.nn.functional as F

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the Optuna objective function
def objective(trial):
    # Hyperparameters to tune
    num_filters1 = trial.suggest_categorical("num_filters1", [32, 64, 128])
    num_filters2 = trial.suggest_categorical("num_filters2", [64, 128, 256])
    kernel_size = trial.suggest_categorical("kernel_size", [3, 5])
    pooling_type = trial.suggest_categorical("pooling", ["max", "avg"])
    fc_size = trial.suggest_categorical("fc_size", [128, 256, 512])
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    negative_slope = trial.suggest_float("negative_slope", 0.01, 0.3)  # Tuning LeakyReLU slope
    
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, num_filters1, kernel_size, padding=1)
            self.conv2 = nn.Conv2d(num_filters1, num_filters2, kernel_size, padding=1)
            self.pool = nn.MaxPool2d(2, 2) if pooling_type == "max" else nn.AvgPool2d(2, 2)
            self.dropout = nn.Dropout(dropout_rate)

            # Dummy forward pass to determine `self._to_linear`
            dummy_input = torch.randn(1, 3, 32, 32)  # CIFAR-10 image size
            with torch.no_grad():
                dummy_out = self._feature_extractor(dummy_input)
                self._to_linear = dummy_out.view(1, -1).shape[1]

            self.fc1 = nn.Linear(self._to_linear, fc_size)
            self.fc2 = nn.Linear(fc_size, 10)

        def _feature_extractor(self, x):
            x = self.pool(F.leaky_relu(self.conv1(x), negative_slope))
            x = self.pool(F.leaky_relu(self.conv2(x), negative_slope))
            return x

        def forward(self, x):
            x = self._feature_extractor(x)
            x = x.view(x.size(0), -1)  # Flatten
            x = self.dropout(F.leaky_relu(self.fc1(x), negative_slope))
            x = self.fc2(x)
            return x

    # Initialize model, loss function, and optimizer
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    num_epochs = 5  # Small value for quick tuning
    for epoch in range(num_epochs):
        model.train()
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    return accuracy  # Optuna will maximize this

# Run hyperparameter tuning
study = optuna.create_study(direction="maximize")  
study.optimize(objective, n_trials=10)  

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)


Files already downloaded and verified
Files already downloaded and verified


[I 2025-03-19 01:37:29,811] A new study created in memory with name: no-name-17ad5d5b-9314-4233-a066-cdd18344da28
[I 2025-03-19 01:38:47,995] Trial 0 finished with value: 0.6667 and parameters: {'num_filters1': 128, 'num_filters2': 256, 'kernel_size': 3, 'pooling': 'avg', 'fc_size': 128, 'dropout_rate': 0.49510579884377515, 'learning_rate': 0.0002962684136310157, 'negative_slope': 0.2002021884267643}. Best is trial 0 with value: 0.6667.
[I 2025-03-19 01:40:05,814] Trial 1 finished with value: 0.7308 and parameters: {'num_filters1': 32, 'num_filters2': 64, 'kernel_size': 3, 'pooling': 'max', 'fc_size': 128, 'dropout_rate': 0.2511333667432508, 'learning_rate': 0.0013823321576015332, 'negative_slope': 0.012050539375327133}. Best is trial 1 with value: 0.7308.
[I 2025-03-19 01:41:22,965] Trial 2 finished with value: 0.6946 and parameters: {'num_filters1': 32, 'num_filters2': 64, 'kernel_size': 5, 'pooling': 'avg', 'fc_size': 128, 'dropout_rate': 0.23314932298749744, 'learning_rate': 0.0031

Best hyperparameters: {'num_filters1': 32, 'num_filters2': 64, 'kernel_size': 3, 'pooling': 'max', 'fc_size': 128, 'dropout_rate': 0.2511333667432508, 'learning_rate': 0.0013823321576015332, 'negative_slope': 0.012050539375327133}


## Elu

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import optuna
import torch.nn.functional as F

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the Optuna objective function
def objective(trial):
    # Hyperparameters to tune
    num_filters1 = trial.suggest_categorical("num_filters1", [32, 64, 128])
    num_filters2 = trial.suggest_categorical("num_filters2", [64, 128, 256])
    kernel_size = trial.suggest_categorical("kernel_size", [3, 5])
    pooling_type = trial.suggest_categorical("pooling", ["max", "avg"])
    fc_size = trial.suggest_categorical("fc_size", [128, 256, 512])
    dropout_rate = trial.suggest_float("dropout_rate", 0.2, 0.5)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    alpha = trial.suggest_float("alpha", 0.01, 1.0)  # Alpha parameter for ELU
    
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, num_filters1, kernel_size, padding=1)
            self.conv2 = nn.Conv2d(num_filters1, num_filters2, kernel_size, padding=1)
            self.pool = nn.MaxPool2d(2, 2) if pooling_type == "max" else nn.AvgPool2d(2, 2)
            self.dropout = nn.Dropout(dropout_rate)

            # Dummy forward pass to determine `self._to_linear`
            dummy_input = torch.randn(1, 3, 32, 32)  # CIFAR-10 image size
            with torch.no_grad():
                dummy_out = self._feature_extractor(dummy_input)
                self._to_linear = dummy_out.view(1, -1).shape[1]

            self.fc1 = nn.Linear(self._to_linear, fc_size)
            self.fc2 = nn.Linear(fc_size, 10)

        def _feature_extractor(self, x):
            x = self.pool(F.elu(self.conv1(x), alpha))
            x = self.pool(F.elu(self.conv2(x), alpha))
            return x

        def forward(self, x):
            x = self._feature_extractor(x)
            x = x.view(x.size(0), -1)  # Flatten
            x = self.dropout(F.elu(self.fc1(x), alpha))
            x = self.fc2(x)
            return x

    # Initialize model, loss function, and optimizer
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    num_epochs = 5  # Small value for quick tuning
    for epoch in range(num_epochs):
        model.train()
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate the model
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    return accuracy  # Optuna will maximize this

# Run hyperparameter tuning
study = optuna.create_study(direction="maximize")  
study.optimize(objective, n_trials=10)  

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)


Files already downloaded and verified
Files already downloaded and verified


[I 2025-03-19 01:58:38,158] A new study created in memory with name: no-name-3c399ec2-f354-4bc4-9396-50e0b050026a
[I 2025-03-19 01:59:54,451] Trial 0 finished with value: 0.5076 and parameters: {'num_filters1': 64, 'num_filters2': 256, 'kernel_size': 5, 'pooling': 'max', 'fc_size': 128, 'dropout_rate': 0.4165034118011366, 'learning_rate': 0.004246662145521995, 'alpha': 0.9896529216312571}. Best is trial 0 with value: 0.5076.
[I 2025-03-19 02:01:10,229] Trial 1 finished with value: 0.5406 and parameters: {'num_filters1': 32, 'num_filters2': 64, 'kernel_size': 3, 'pooling': 'avg', 'fc_size': 256, 'dropout_rate': 0.2420231527040384, 'learning_rate': 0.00010767904128956026, 'alpha': 0.5239840053117389}. Best is trial 1 with value: 0.5406.
[I 2025-03-19 02:02:26,318] Trial 2 finished with value: 0.1 and parameters: {'num_filters1': 32, 'num_filters2': 128, 'kernel_size': 3, 'pooling': 'avg', 'fc_size': 128, 'dropout_rate': 0.21092927395696745, 'learning_rate': 0.009839815507054503, 'alpha':

Best hyperparameters: {'num_filters1': 128, 'num_filters2': 64, 'kernel_size': 3, 'pooling': 'max', 'fc_size': 512, 'dropout_rate': 0.2525871431125819, 'learning_rate': 0.0025973752491179067, 'alpha': 0.07234758616795293}
