In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0


In [2]:
import optuna
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
import kagglehub
from kagglehub import KaggleDatasetAdapter

file_path = "fashion-mnist_train.csv"

df = kagglehub.load_dataset(
  KaggleDatasetAdapter.PANDAS,
  "zalando-research/fashionmnist",
  file_path
)

  df = kagglehub.load_dataset(


Using Colab cache for faster access to the 'fashionmnist' dataset.


In [5]:
x, y = df.iloc[:, 1:].values, df.iloc[:, 0].values

In [6]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, shuffle=True, random_state=42, stratify = y)

In [7]:
train_augment = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip()
])

In [8]:
class CustomDataset(Dataset):

  def __init__(self, features, labels, transform=None):
    self.features = torch.tensor(features, dtype=torch.float).reshape(-1, 1, 28, 28)
    self.labels = torch.tensor(labels, dtype=torch.long)
    self.transform = transform

  def __len__(self):
    return len(self.features)

  def __getitem__(self, idx):
    img, labels = self.features[idx], self.labels[idx]
    if self.transform:
      img =  self.transform(img)
    return img, labels


In [9]:
traindataset = CustomDataset(xtrain, ytrain, transform=train_augment)
testdataset = CustomDataset(xtest, ytest)

In [28]:
class myCNN(nn.Module):

  def __init__(self, input_features, filter_per_layer, filter_size, activation, neurons_per_layer, dropout_per_layer):
    super(myCNN, self).__init__()
    feature_layers = []

    activation = getattr(nn, activation)
    for filters, size in zip(filter_per_layer, filter_size):
      feature_layers.append(nn.Conv2d(input_features, filters, size, padding='same'))
      feature_layers.append(nn.BatchNorm2d(filters))
      feature_layers.append(activation())
      feature_layers.append(nn.MaxPool2d(2,2))
      input_features = filters

    self.features = nn.Sequential(*feature_layers)

    dummy_input = torch.zeros(1, 1, 28, 28)
    with torch.no_grad():
      dummy_output = self.features(dummy_input)
    input_size = dummy_output.flatten().shape[0]

    classification_layers = [nn.Flatten(), ]

    for neuron, dropout in zip(neurons_per_layer, dropout_per_layer):
      classification_layers.append(nn.Linear(input_size, neuron))
      classification_layers.append(nn.BatchNorm1d(neuron))
      classification_layers.append(activation())
      classification_layers.append(nn.Dropout(dropout))
      input_size = neuron

    classification_layers.append(nn.Linear(input_size, 10))

    self.classifier = nn.Sequential(*classification_layers)

  def forward(self, x):
    x = self.features(x)
    x = self.classifier(x)
    return x

In [35]:
def objective(trial):

  num_epochs = trial.suggest_int('num_epochs', 1, 4)
  learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
  batch_size = trial.suggest_categorical('batch_size', [64, 128, 256, 512])
  optimizer = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
  num_conv_layers = trial.suggest_categorical('conv_layers', [1,2,3])
  activation = trial.suggest_categorical('activation', ['ReLU', 'Tanh', 'ELU'])
  num_fc_layers = trial.suggest_categorical('num_fc_layers', [2, 3, 4, 5, 6])

  filter_per_layer = []
  filter_size = []

  for i in range(num_conv_layers):
    filter_per_layer.append(trial.suggest_int(f"filter_in_conv_layer_{i}", 16, 128))
    filter_size.append(trial.suggest_int(f'filter_size_in_conv_layer_{i}', 2, 5))

  neurons_per_layer = []
  dropout_per_layer = []
  for i in range(num_fc_layers):
    neurons_per_layer.append(trial.suggest_categorical(f'neurons_in_fc_layer_{i}', [16, 32, 64, 128, 256]))
    dropout_per_layer.append(trial.suggest_categorical(f'dropout_in_fc_layer_{i}', [0.1, 0.2, 0.3, 0.4]))

  model = myCNN(input_features=1, filter_per_layer=filter_per_layer, filter_size=filter_size, activation = activation, neurons_per_layer=neurons_per_layer, dropout_per_layer=dropout_per_layer)

  model.to(device)

  train_loader = DataLoader(traindataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
  test_loader = DataLoader(traindataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

  loss_function = nn.CrossEntropyLoss()

  optimizer_class = getattr(optim, optimizer)

  optimizer = optimizer_class(model.parameters(), lr=learning_rate)

  for epoch in range(num_epochs):
    model.train()
    epoch_loss = []

    for batch_features, batch_labels in train_loader:

      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      pred = model(batch_features)

      loss = loss_function(pred, batch_labels)

      epoch_loss.append(loss.item())

      optimizer.zero_grad()

      loss.backward()

      optimizer.step()

  model.eval()
  with torch.no_grad():
    prediction = model(testdataset.features.to(device))
    prediction = torch.argmax(prediction, dim=1)
    score = accuracy_score(testdataset.labels.cpu(), prediction.cpu())

  return score


In [38]:
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(objective, n_trials=2)

[I 2026-01-05 17:43:31,412] A new study created in memory with name: no-name-31c62a64-af71-4239-a380-d1ffa0aeb2fe
[I 2026-01-05 17:44:54,316] Trial 0 finished with value: 0.8702 and parameters: {'num_epochs': 4, 'learning_rate': 0.0013033522556026179, 'batch_size': 256, 'optimizer': 'RMSprop', 'conv_layers': 1, 'activation': 'Tanh', 'num_fc_layers': 5, 'filter_in_conv_layer_0': 87, 'filter_size_in_conv_layer_0': 4, 'neurons_in_fc_layer_0': 64, 'dropout_in_fc_layer_0': 0.1, 'neurons_in_fc_layer_1': 64, 'dropout_in_fc_layer_1': 0.4, 'neurons_in_fc_layer_2': 128, 'dropout_in_fc_layer_2': 0.3, 'neurons_in_fc_layer_3': 32, 'dropout_in_fc_layer_3': 0.4, 'neurons_in_fc_layer_4': 32, 'dropout_in_fc_layer_4': 0.3}. Best is trial 0 with value: 0.8702.
[I 2026-01-05 17:45:13,573] Trial 1 finished with value: 0.7990666666666667 and parameters: {'num_epochs': 1, 'learning_rate': 7.648678986625903e-05, 'batch_size': 64, 'optimizer': 'RMSprop', 'conv_layers': 2, 'activation': 'ELU', 'num_fc_layers': 

In [40]:
study.best_params

{'num_epochs': 4,
 'learning_rate': 0.0013033522556026179,
 'batch_size': 256,
 'optimizer': 'RMSprop',
 'conv_layers': 1,
 'activation': 'Tanh',
 'num_fc_layers': 5,
 'filter_in_conv_layer_0': 87,
 'filter_size_in_conv_layer_0': 4,
 'neurons_in_fc_layer_0': 64,
 'dropout_in_fc_layer_0': 0.1,
 'neurons_in_fc_layer_1': 64,
 'dropout_in_fc_layer_1': 0.4,
 'neurons_in_fc_layer_2': 128,
 'dropout_in_fc_layer_2': 0.3,
 'neurons_in_fc_layer_3': 32,
 'dropout_in_fc_layer_3': 0.4,
 'neurons_in_fc_layer_4': 32,
 'dropout_in_fc_layer_4': 0.3}

In [41]:
study.best_value

0.8702