In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

In [2]:
df = pd.read_csv('../2. Dataset/fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [3]:
x = df.iloc[:,1:].values / 255.0
y = df.iloc[:,0].values 

In [4]:
xtrain , xtest , ytrain , ytest = train_test_split(x,y,test_size=0.2,random_state=20)

In [5]:
xtrain_tensor = torch.from_numpy(xtrain).float()
xtest_tensor = torch.from_numpy(xtest).float()
ytrain_tensor = torch.from_numpy(ytrain)
ytest_tensor = torch.from_numpy(ytest)

In [6]:
class CustomDataset(Dataset):

    def __init__(self, features, labels):
        
        self.features = features
        self.labels = labels

    def __len__(self):
        
        return len(self.features)
    
    def __getitem__(self, idx):
        
        return self.features[idx], self.labels[idx]


In [7]:
train_dataset = CustomDataset(xtrain_tensor,ytrain_tensor)
test_dataset = CustomDataset(xtest_tensor,ytest_tensor)

In [8]:
class MyNN(nn.Module):

  def __init__(self,input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate):

    super().__init__()

    layers = []

    for i in range(num_hidden_layers):

      layers.append(nn.Linear(input_dim, neurons_per_layer))
      layers.append(nn.BatchNorm1d(neurons_per_layer))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layer
    
    layers.append(nn.Linear(neurons_per_layer, output_dim))

    self.network = nn.Sequential(*layers)

  def forward(self, features):

    out = self.network(features)

    return out

In [9]:
device = 'cpu'
if hasattr(torch,'mps') and torch.backends.mps.is_available():
    device = 'mps'
    print("MPS is available")

MPS is available


In [10]:
# objective function
def objective(trial):

  # Hyperparameter values from the search space
  
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
  neurons_per_layer = trial.suggest_int("neurons_per_layer", 8, 128, step=8)
  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)

  epochs = trial.suggest_int("epochs", 10, 50, step=10)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
  batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
  
  optimizer_name = trial.suggest_categorical("optimizer", ['Adam', 'SGD', 'RMSprop'])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

  # Dataloader

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

  # model init
  input_dim = 784
  output_dim = 10

  model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer, dropout_rate)
  model.to(device)

  # optimizer selection
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.1, weight_decay=1e-4)


  if optimizer_name == 'Adam':
      optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == 'SGD':
      optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
      optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)


  # Training loop

  for epoch in range(epochs):

    for batch_features, batch_labels in train_loader:

      # move data to gpu
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      # forward pass
      outputs = model(batch_features)

      # calculate loss
      loss = criterion(outputs, batch_labels)

      # back pass
      optimizer.zero_grad()
      loss.backward()

      # update grads
      optimizer.step()


  # Evaluation 

  model.eval()

  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_loader:

      # move data to gpu
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      outputs = model(batch_features)

      _, predicted = torch.max(outputs, 1)

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

    accuracy = correct/total

  return accuracy

In [11]:
import optuna

study = optuna.create_study(direction='maximize')

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-12-04 12:57:33,284] A new study created in memory with name: no-name-87ffc6c3-38f9-4516-87dc-2f3110e00b17


In [12]:
study.optimize(objective, n_trials=10)

[I 2025-12-04 12:57:39,633] Trial 0 finished with value: 0.7991666666666667 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 24, 'dropout_rate': 0.30000000000000004, 'epochs': 20, 'learning_rate': 0.005278057064533982, 'batch_size': 64, 'optimizer': 'Adam', 'weight_decay': 0.0005225429583742023}. Best is trial 0 with value: 0.7991666666666667.
[I 2025-12-04 12:57:41,334] Trial 1 finished with value: 0.8091666666666667 and parameters: {'num_hidden_layers': 1, 'neurons_per_layer': 48, 'dropout_rate': 0.2, 'epochs': 10, 'learning_rate': 0.006212544066544919, 'batch_size': 64, 'optimizer': 'Adam', 'weight_decay': 8.074244676840819e-05}. Best is trial 1 with value: 0.8091666666666667.
[I 2025-12-04 12:57:47,241] Trial 2 finished with value: 0.8241666666666667 and parameters: {'num_hidden_layers': 2, 'neurons_per_layer': 104, 'dropout_rate': 0.4, 'epochs': 10, 'learning_rate': 0.00020664587333825375, 'batch_size': 32, 'optimizer': 'Adam', 'weight_decay': 0.0001044514744814507}. 

In [13]:
study.best_value

0.8241666666666667