<a href="https://colab.research.google.com/github/anirban1221/Learning_PyTorch/blob/main/ANN_using_Optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.optim as optim

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x7c754b01b1d0>

In [29]:
import pandas as pd
df=pd.read_csv('/content/fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
df.shape

(60000, 785)

In [4]:
X=df.iloc[:,1:]
y=df.iloc[:,0]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [5]:
X_train=X_train/255.0
X_test=X_test/255.0

In [30]:
## Dataset class
class CustomDataset(Dataset):
  def __init__(self,features,labels):
    self.features=torch.tensor(features,dtype=torch.float32)
    self.labels=torch.tensor(labels,dtype=torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self,index):
    return self.features[index], self.labels[index]

In [31]:
train_dataset=CustomDataset(X_train.values,y_train.values)
test_dataset=CustomDataset(X_test.values,y_test.values)

In [32]:
len(test_dataset)

12000

In [33]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'using device: {device}')

using device: cuda


In [34]:
class NeuralNetwork(nn.Module):
  def __init__(self,input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate):
    super().__init__()
    layers=[]

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim,neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim=neurons_per_layer

    layers.append(nn.Linear(neurons_per_layer,output_dim))

    self.model=nn.Sequential(*layers)

  def forward(self,x):
    return self.model(x)

In [41]:
##defining the objective function for constructing OPTUNA
def objective(trial):

  ## choosing next hyperparamater value from the search space:
  num_hidden_layers=trial.suggest_int("num_hidden_layers",1,5)
  neurons_per_layer=trial.suggest_int("neurons_per_layers",8,128,step=8)
  epochs=trial.suggest_int("epochs",10,50)
  learning_rate=trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
  dropout_rate=trial.suggest_float("dropout_rate",0.1,0.5,step=0.1)
  batch_size=trial.suggest_categorical("batch_size",[16,32,64,128])
  optimizer_name=trial.suggest_categorical("optimizer_name",['Adam',"SGD","RMSprop"])
  weight_decay=trial.suggest_float("weight_decay",1e-5,1e-3,log=True)


  train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
  test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=True)

  input_dim=784
  output_dim=10

  model=NeuralNetwork(input_dim,output_dim,num_hidden_layers,neurons_per_layer,dropout_rate)
  model.to(device)


  ## optimizer selection
  criterion=nn.CrossEntropyLoss()
  if optimizer_name == 'Adam':
    optimizer=optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
    optimizer=optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
    optimizer=optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)



  ## training_loop
  for epoch in range(epochs):
    total_epoch_loss=0
    for batch_features,batch_labels in train_loader:
      batch_features=batch_features.to(device)
      batch_labels=batch_labels.to(device)
      ## forward pass
      outputs=model(batch_features)
      ## calculate loss
      loss=criterion(outputs,batch_labels)
      ## back pass
      optimizer.zero_grad()
      loss.backward()
      ## update grads
      optimizer.step()

  ##evaluation:
  model.eval()

  total=0
  correct=0

  with torch.no_grad():
    for batch_features,batch_labels in test_loader:
      batch_features,batch_labels=batch_features.to(device),batch_labels.to(device)
      outputs=model(batch_features)
      _,predicted=torch.max(outputs,1)
      total=total+batch_labels.shape[0]
      correct=correct+(predicted==batch_labels).sum().item()

    accuracy=correct/total

  return accuracy




In [12]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


In [39]:
import optuna
study=optuna.create_study(direction='maximize')

[I 2025-05-13 09:52:45,392] A new study created in memory with name: no-name-d1e33858-8deb-4dc3-acbd-982c6203cca3


In [42]:
study.optimize(objective,n_trials=10)

[I 2025-05-13 09:58:21,443] Trial 1 finished with value: 0.8659166666666667 and parameters: {'num_hidden_layers': 3, 'neurons_per_layers': 88, 'epochs': 43, 'learning_rate': 1.8350623951430767e-05, 'dropout_rate': 0.30000000000000004, 'batch_size': 128, 'optimizer_name': 'RMSprop', 'weight_decay': 3.496311531874648e-05}. Best is trial 1 with value: 0.8659166666666667.
[I 2025-05-13 10:00:49,187] Trial 2 finished with value: 0.88175 and parameters: {'num_hidden_layers': 4, 'neurons_per_layers': 56, 'epochs': 43, 'learning_rate': 0.005763922175014464, 'dropout_rate': 0.2, 'batch_size': 64, 'optimizer_name': 'SGD', 'weight_decay': 0.0007178847183226318}. Best is trial 2 with value: 0.88175.
[I 2025-05-13 10:01:26,105] Trial 3 finished with value: 0.87725 and parameters: {'num_hidden_layers': 4, 'neurons_per_layers': 56, 'epochs': 16, 'learning_rate': 0.0007405594840116127, 'dropout_rate': 0.2, 'batch_size': 128, 'optimizer_name': 'Adam', 'weight_decay': 0.0005878764670596082}. Best is tri

In [43]:
study.best_value

0.8919166666666667

In [44]:
study.best_params

{'num_hidden_layers': 3,
 'neurons_per_layers': 104,
 'epochs': 22,
 'learning_rate': 0.000850522648722386,
 'dropout_rate': 0.2,
 'batch_size': 64,
 'optimizer_name': 'RMSprop',
 'weight_decay': 4.3086223516571836e-05}

In [None]:
## experiment tracking with MLflow we have to find out the reliable parameters using the
## method