<a href="https://colab.research.google.com/github/anshmehta337/deep-learning/blob/main/optunaworkingonMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [3]:
import optuna

In [4]:
#lets load our dataset too
trainset=torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transforms.ToTensor())
testset=torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transforms.ToTensor())

100%|██████████| 9.91M/9.91M [00:00<00:00, 35.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.11MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 8.32MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.82MB/s]


In [5]:
#lets load them
trainloader=torch.utils.data.DataLoader(trainset,batch_size=64,shuffle=True)
testloader=torch.utils.data.DataLoader(testset,batch_size=64,shuffle=False)

In [21]:

# Define model
class Net(nn.Module):
    def __init__(self, hidden_size, drop):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, hidden_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drop)
        self.fc2 = nn.Linear(hidden_size, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define objective
def obj(trial):
    hidd = trial.suggest_int('hidden_size', 127, 514)
    dr = trial.suggest_float('dr', 0.0, 0.2)
    lr = trial.suggest_float('lr', 1e-4, 1e-1, log=True)
    opt_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    mod = Net(hidd, dr).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = getattr(optim, opt_name)(mod.parameters(), lr=lr)

    # Training
    mod.train()
    for epoch in range(5):
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = mod(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluation
    mod.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = mod(inputs)
            _, pred = outputs.max(1)
            correct += (pred == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return 1.0 - accuracy  # minimize error

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(obj, n_trials=30)

# Results
print("Best trial:")
print("  Accuracy: {:.4f}".format(1 - study.best_value))
print("  Params: ", study.best_params)


[I 2025-08-01 14:46:30,869] A new study created in memory with name: no-name-52ebdeb6-6de1-433a-adcb-e406f00bf824
[I 2025-08-01 14:47:21,339] Trial 0 finished with value: 0.44530000000000003 and parameters: {'hidden_size': 382, 'dr': 0.04042794718908396, 'lr': 0.00012560930249529457, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.44530000000000003.
[I 2025-08-01 14:48:24,515] Trial 1 finished with value: 0.02059999999999995 and parameters: {'hidden_size': 485, 'dr': 0.0530568266909244, 'lr': 0.0006208747593762662, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.02059999999999995.
[I 2025-08-01 14:49:10,457] Trial 2 finished with value: 0.272 and parameters: {'hidden_size': 223, 'dr': 0.18846145029464548, 'lr': 0.00043726964307582984, 'optimizer': 'SGD'}. Best is trial 1 with value: 0.02059999999999995.
[I 2025-08-01 14:50:00,868] Trial 3 finished with value: 0.4938 and parameters: {'hidden_size': 408, 'dr': 0.033111500936658426, 'lr': 0.00010323284352465884, 'optimizer': 'SGD'}

Best trial:
  Accuracy: 0.9810
  Params:  {'hidden_size': 427, 'dr': 0.06299681678736818, 'lr': 0.001470088462283412, 'optimizer': 'Adam'}
