<a href="https://colab.research.google.com/github/jay-kanakia/GenAI/blob/main/09_Pytorch_project_Optuna_ann_fashion_mnist_gpu_optimized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn

In [2]:
pip install optuna

Collecting optuna
  Downloading optuna-4.7.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.7.0-py3-none-any.whl (413 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.9/413.9 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.7.0


In [3]:
import optuna

In [4]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d zalando-research/fashionmnist
!unzip fashionmnist.zip -d data/

Dataset URL: https://www.kaggle.com/datasets/zalando-research/fashionmnist
License(s): other
Downloading fashionmnist.zip to /content
 77% 53.0M/68.8M [00:00<00:00, 553MB/s]
100% 68.8M/68.8M [00:00<00:00, 538MB/s]
Archive:  fashionmnist.zip
  inflating: data/fashion-mnist_test.csv  
  inflating: data/fashion-mnist_train.csv  
  inflating: data/t10k-images-idx3-ubyte  
  inflating: data/t10k-labels-idx1-ubyte  
  inflating: data/train-images-idx3-ubyte  
  inflating: data/train-labels-idx1-ubyte  


In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x7d4aa78a1a70>

In [6]:
train_data=pd.read_csv(r'/content/data/fashion-mnist_train.csv')
test_data=pd.read_csv(r'/content/data/fashion-mnist_test.csv')

train_data.sample(2)

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
15093,3,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
45599,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,11,19,12,0,0,0


In [7]:
device=torch.device('cuda')

In [8]:
X_train=train_data.iloc[:,1:].values
X_test=test_data.iloc[:,1:].values

y_train=train_data.iloc[:,0].values
y_test=test_data.iloc[:,0].values

In [9]:
X_train=X_train/255
X_test=X_test/255

X_train_tensor=torch.from_numpy(X_train.astype(np.float32))
X_test_tensor=torch.from_numpy(X_test.astype(np.float32))

y_train_tensor=torch.from_numpy(y_train).long()
y_test_tensor=torch.from_numpy(y_test).long()

In [10]:
class MyDataset(Dataset):

  def __init__(self,features,labels):

    self.features=features
    self.labels=labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index],self.labels[index]

In [11]:
train_dataset=MyDataset(X_train_tensor,y_train_tensor)
test_dataset=MyDataset(X_test_tensor,y_test_tensor)

In [12]:
class MyNN(nn.Module):

  def __init__(self,num_layers,num_neuron_per_layer,dropout_rate,input_dim,output_dim):

    super().__init__()

    layers=[]

    for i in range(num_layers):

      layers.append(nn.Linear(input_dim,num_neuron_per_layer))
      layers.append(nn.BatchNorm1d(num_neuron_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(p=dropout_rate))
      input_dim=num_neuron_per_layer

    layers.append(nn.Linear(num_neuron_per_layer,output_dim))

    self.model=nn.Sequential(*layers)

  def forward(self,X):
      return self.model(X)


In [13]:
def objective(trial):

  num_layers=trial.suggest_int('num_layers',2,10)
  num_neuron_per_layer=trial.suggest_int('num_neurons_layers',32,128,step=32)
  dropout_rate=trial.suggest_float('dropout_rate',0.1,0.5,step=0.1)
  epochs=trial.suggest_int('epochs',150,300,step=50)
  learning_rate=trial.suggest_float('learning_rate',1e-4,1e-1,log=True)
  weight_decay=trial.suggest_float('weight_decay',1e-5,1e-3,log=True)
  optimizer_name=trial.suggest_categorical('optimizer_name',['Adam','RMSProp','SGD'])
  batch_size=trial.suggest_int('batch_size',64,256,step=64)

  train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True,pin_memory=True)
  test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False,pin_memory=True)

  input_dim=784
  output_dim=10
  model=MyNN(num_layers,num_neuron_per_layer,dropout_rate,input_dim,output_dim)
  model=model.to(device)
  loss_function=nn.CrossEntropyLoss()

  if optimizer_name=='Adam':
    optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  elif optimizer_name=='RMSProp':
    optimizer=torch.optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
  elif optimizer_name=='SGD':
    optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

  for epoch in range(epochs):
    for batch_features,batch_labels in train_loader:
      batch_features=batch_features.to(device)
      batch_labels=batch_labels.to(device)

      y_pred=model(batch_features)

      loss=loss_function(y_pred,batch_labels)

      optimizer.zero_grad()

      loss.backward()

      optimizer.step()


  model.eval()

  total=0
  correct=0
  with torch.no_grad():
    for batch_features,batch_labels in test_loader:
      batch_features=batch_features.to(device)
      batch_labels=batch_labels.to(device)

      y_pred=model(batch_features)
      y_pred=torch.argmax(y_pred,dim=1)

      total = total + batch_labels.shape[0]

      correct = correct + (y_pred == batch_labels).sum().item()

    accuracy = correct/total

  return accuracy


In [14]:
study=optuna.create_study(direction='maximize',sampler=optuna.samplers.TPESampler())
study.optimize(objective,n_trials=10)

[I 2026-01-22 03:12:31,113] A new study created in memory with name: no-name-a13bacfe-810f-455c-9e5d-f062b21418ff
[I 2026-01-22 03:20:31,639] Trial 0 finished with value: 0.8782 and parameters: {'num_layers': 7, 'num_neurons_layers': 64, 'dropout_rate': 0.1, 'epochs': 250, 'learning_rate': 0.0039056768569006683, 'weight_decay': 1.584184356856342e-05, 'optimizer_name': 'Adam', 'batch_size': 256}. Best is trial 0 with value: 0.8782.
[I 2026-01-22 03:27:17,124] Trial 1 finished with value: 0.8792 and parameters: {'num_layers': 3, 'num_neurons_layers': 32, 'dropout_rate': 0.2, 'epochs': 250, 'learning_rate': 0.00014886003949046226, 'weight_decay': 2.7723995959816137e-05, 'optimizer_name': 'Adam', 'batch_size': 192}. Best is trial 1 with value: 0.8792.
[I 2026-01-22 03:34:38,891] Trial 2 finished with value: 0.889 and parameters: {'num_layers': 7, 'num_neurons_layers': 64, 'dropout_rate': 0.1, 'epochs': 250, 'learning_rate': 0.008421080120280849, 'weight_decay': 0.0001612775820743831, 'opti

In [16]:
study.best_params

{'num_layers': 7,
 'num_neurons_layers': 96,
 'dropout_rate': 0.30000000000000004,
 'epochs': 300,
 'learning_rate': 0.0005916502567433015,
 'weight_decay': 0.00012897301011239107,
 'optimizer_name': 'Adam',
 'batch_size': 256}

In [18]:
study.best_trial

FrozenTrial(number=8, state=<TrialState.COMPLETE: 1>, values=[0.8954], datetime_start=datetime.datetime(2026, 1, 22, 4, 26, 32, 365865), datetime_complete=datetime.datetime(2026, 1, 22, 4, 35, 50, 196049), params={'num_layers': 7, 'num_neurons_layers': 96, 'dropout_rate': 0.30000000000000004, 'epochs': 300, 'learning_rate': 0.0005916502567433015, 'weight_decay': 0.00012897301011239107, 'optimizer_name': 'Adam', 'batch_size': 256}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'num_layers': IntDistribution(high=10, log=False, low=2, step=1), 'num_neurons_layers': IntDistribution(high=128, log=False, low=32, step=32), 'dropout_rate': FloatDistribution(high=0.5, log=False, low=0.1, step=0.1), 'epochs': IntDistribution(high=300, log=False, low=150, step=50), 'learning_rate': FloatDistribution(high=0.1, log=True, low=0.0001, step=None), 'weight_decay': FloatDistribution(high=0.001, log=True, low=1e-05, step=None), 'optimizer_name': CategoricalDistribution(choices=('

In [23]:
study.best_value

0.8954