<a href="https://colab.research.google.com/github/YashsTiwari/ML_DL_revision/blob/main/Using_Optuna_for_an_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt

In [6]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [7]:
#for reproducibility setting seedvalue
torch.manual_seed(42)

<torch._C.Generator at 0x7c47c6b0b7f0>

In [8]:
df=pd.read_csv('fashion-mnist_train.csv') ##subset of 6k images from FMNIST DS
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
X=df.iloc[:,1:].values
y=df.iloc[:,0].values

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
X_train=X_train/255.0
X_test=X_test/255.0

In [12]:
class CustomDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features,dtype=torch.float32)
    self.labels=torch.tensor(labels,dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [13]:
train_dataset=CustomDataset(X_train,y_train)
test_dataset=CustomDataset(X_test,y_test)

In [14]:
class MyNN(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layer, neurons_per_layer, dropout_rate):
    super().__init__()
    layers=[]

    for i in range(num_hidden_layer):
      layers.append(nn.Linear(input_dim, neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim=neurons_per_layer

    layers.append(nn.Linear(neurons_per_layer, output_dim))
    self.model=nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)


In [15]:
def objective(trail):
    num_hidden_layers=trail.suggest_int('num_hidden_layers',1,5)
    neurons_per_layer=trail.suggest_int('neurons_per_layer',8,128,step=8)
    epochs=trail.suggest_int('epochs',10,50,step=10)
    lr=trail.suggest_float('lr',1e-5,1e-1,log=True)
    dropout_rate=trail.suggest_float('dropout_rate',0.1,0.5,step=0.1)
    batch_size=trail.suggest_categorical('batch_size',[16,32,64,128])
    optimiser_name=trail.suggest_categorical('optimiser',['Adam','SGD', 'RMSprop'])
    weight_decay=trail.suggest_float('weight_decay',1e-5,1e-3,log=True)

    train_loader=DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader=DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    input_dim=784
    output_dim=10

    model=MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)
    model.to(device)

    criterion=nn.CrossEntropyLoss()
    # optimizer=optim.SGD(model.parameters(), lr=lr, weight_decay=1e-4)

    if optimiser_name=='Adam':
      optimizer=optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimiser_name=='SGD':
      optimizer=optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    else:
      optimizer=optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)

    for epoch in range(epochs):
      for batch_features, batch_labels in train_loader:
        batch_features, batch_labels=batch_features.to(device), batch_labels.to(device)
        output=model(batch_features)
        loss=criterion(output, batch_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()

    # evaluation code on test data
    total = 0
    correct = 0

    with torch.no_grad():

      for batch_features, batch_labels in test_loader:
        batch_features, batch_labels=batch_features.to(device), batch_labels.to(device)
        outputs = model(batch_features)
        _, predicted = torch.max(outputs, 1)
        total = total + batch_labels.shape[0]
        correct = correct + (predicted == batch_labels).sum().item()

    accuracy=correct/total
    return accuracy


In [16]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


In [17]:
import optuna

study=optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

[I 2025-06-26 13:18:27,768] A new study created in memory with name: no-name-4181cd68-ac5f-401f-95da-55b40fe0f33c
[I 2025-06-26 13:19:41,416] Trial 0 finished with value: 0.76925 and parameters: {'num_hidden_layers': 3, 'neurons_per_layer': 96, 'epochs': 50, 'lr': 0.0066522424942910665, 'dropout_rate': 0.1, 'batch_size': 128, 'optimiser': 'RMSprop', 'weight_decay': 0.0006122346992458529}. Best is trial 0 with value: 0.76925.
[I 2025-06-26 13:20:53,858] Trial 1 finished with value: 0.8678333333333333 and parameters: {'num_hidden_layers': 3, 'neurons_per_layer': 96, 'epochs': 10, 'lr': 0.02069016711075164, 'dropout_rate': 0.4, 'batch_size': 16, 'optimiser': 'SGD', 'weight_decay': 0.0005314717463192598}. Best is trial 1 with value: 0.8678333333333333.
[I 2025-06-26 13:22:05,025] Trial 2 finished with value: 0.848 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 32, 'epochs': 50, 'lr': 0.0013515633464726148, 'dropout_rate': 0.5, 'batch_size': 64, 'optimiser': 'SGD', 'weight_de

In [18]:
study.best_value

0.888

In [19]:
study.best_params

{'num_hidden_layers': 2,
 'neurons_per_layer': 72,
 'epochs': 50,
 'lr': 3.7521545573532974e-05,
 'dropout_rate': 0.2,
 'batch_size': 32,
 'optimiser': 'Adam',
 'weight_decay': 4.6890475190145486e-05}