In [27]:
import torch
import torch.nn as nn
import torchvision
from torchinfo import summary
from torch.utils.data import DataLoader

In [28]:
transform_train = torchvision.transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

transform_test = torchvision.transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])


In [29]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                             download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                            download=True,
                                            transform=transform_test)


In [36]:
classes = train_dataset.classes
print(classes)


['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [35]:
img, label = train_dataset[0]
print(img.shape, label)

torch.Size([3, 32, 32]) 6


In [31]:
device = 'cpu'
if hasattr(torch,'mps') and torch.backends.mps.is_available():
    device = 'mps'
    print("MPS is available")

MPS is available


In [32]:
class SimpleCNN(nn.Module):

    def __init__(self,in_channels = 3,num_classes=10,dropout = 0.5):

        super(SimpleCNN, self).__init__()
        
        self.features = nn.Sequential(

            nn.Conv2d(in_channels, 32, kernel_size=3, padding=1),  # (32x32x32)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # (32x16x16)
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # (64x16x16)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2),  # (64x8x8)
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1), # (128x8x8)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2)   # (128x4x4)

        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*4*4, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):

        x = self.features(x)
        x = self.classifier(x)
        
        return x

In [38]:

def objective(trial):


    # Hyperparameters to tune

    dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
    
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)

    optimizer_name = trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'RMSprop'])
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    num_epochs = trial.suggest_int('num_epochs', 10, 30)

    num_channels = 3
    num_classes = 10
    
    model = SimpleCNN(num_channels,num_classes,dropout_rate).to(device)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                           shuffle=True)

    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                          shuffle=False)


    # Optimizer
    if optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(num_epochs):
        
        model.train()

        for batch_features, batch_labels in train_loader:

            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(batch_features)

            loss = criterion(outputs, batch_labels)
            
            loss.backward()
            
            optimizer.step()


    # Validation loop
    model.eval()  # how the model layers behave like disable dropout and uses running mean and variance.

    correct = 0
    total = 0
    
    with torch.no_grad():

        for batch_features, batch_labels in test_loader:
        
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
        
            total += batch_labels.size()[0]
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct / total
    return accuracy

In [39]:
import optuna

pruner = optuna.pruners.MedianPruner() 
# A pruner in Optuna is a smart early-stopping strategy.
# It stops training of unpromising trials to save time.

study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(objective, n_trials=5) 


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-12-07 20:55:23,864] A new study created in memory with name: no-name-a85aeb58-5b8d-4e7d-88a2-6ad1db5f9707
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
[I 2025-12-07 21:01:15,402] Trial 0 finished with value: 0.7192 and parameters: {'dropout_rate': 0.3082117547208877, 'weight_decay': 9.319508743623131e-05, 'learning_rate': 0.006240332038931586, 'optimizer': 'SGD', 'batch_size': 64, 'num_epochs': 30}. Best is trial 0 with value: 0.7192.
[I 2025-12-07 21:06:34,981] Trial 1 finished with value: 0.823 and parameters: {'dropout_rate': 0.2827641169772, 'weight_decay': 9.369769632110015e-05, 'learning_rate': 0.0002464357490261473, 'optimizer': 'Adam', 'batch_size': 32, 'num_epochs': 26}. Best is trial 1 with value: 0.823.
[I 2025-12-07 21:10:34,957] Trial 2 finished with value:

In [40]:
print("Best hyperparameters:", study.best_params)
print("Best accuracy:", study.best_value)

Best hyperparameters: {'dropout_rate': 0.2827641169772, 'weight_decay': 9.369769632110015e-05, 'learning_rate': 0.0002464357490261473, 'optimizer': 'Adam', 'batch_size': 32, 'num_epochs': 26}
Best accuracy: 0.823
