In [1]:
import torch
import torch.nn as nn

import tensorflow as tf
from tensorflow.keras.datasets import mnist

### Dataset and Preprocessing

In [2]:
(X, y), (Xtest, ytest) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
X.shape, y.shape, Xtest.shape, ytest.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X.reshape(-1, 28*28))
Xtest = scaler.transform(Xtest.reshape(-1, 28*28))

In [6]:
X.shape, Xtest.shape

((60000, 784), (10000, 784))

In [7]:
from torch.utils.data import Dataset, DataLoader
torch.manual_seed(42)

<torch._C.Generator at 0x7ba175b2d2f0>

In [8]:
class customDataset(Dataset):
  def __init__(self, X, Y):
    self.X = torch.tensor(X, dtype=torch.float32)
    self.Y = torch.tensor(Y, dtype=torch.long)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, i):
    return self.X[i], self.Y[i]

In [26]:
train_dataset = customDataset(X, y)
test_dataset = customDataset(Xtest, ytest)

In [10]:
len(train_dataset), train_dataset[0][0].shape

(60000, torch.Size([784]))

In [11]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.1-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.1-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.1 colorlog-6.9.0 optuna-4.3.0


In [29]:
class MyNN(nn.Module):
  def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layers, dropout_rate):
    super().__init__()
    layers = []
    for i in range(num_hidden_layers):
      layers.append(nn.Linear(input_dim, neurons_per_layers))
      layers.append(nn.BatchNorm1d(neurons_per_layers))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(dropout_rate))
      input_dim = neurons_per_layers

    layers.append(nn.Linear(input_dim, output_dim))

    self.network = nn.Sequential(*layers)

  def forward(self, x):
    return self.network(x)

In [30]:
import optuna
#objective function

def objective(trial):
  # next hyperparameter values from the search space
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
  neurons_per_layers = trial.suggest_int("neurons_per_layers", 8, 128, step=8)

  dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)

  batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])


  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

  # model init
  input_dim = 784
  output_dim = 10

  model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layers, dropout_rate)
  model.to(device)

  # params init
  epoch = trial.suggest_int("epoch", 10, 50, step=10)
  learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)

  # optimizer selection
  criterion = nn.CrossEntropyLoss()
  optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])
  weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

  if optimizer_name == "Adam":
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  elif optimizer_name == "RMSprop":
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
  else:
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # adding lambda term "weight_decay" which will use L2 regularization

  # training loop
  for i in range(epoch):
    for batch_x, batch_y in train_loader:
      batch_x = batch_x.to(device)
      batch_y = batch_y.to(device)
      y_pred = model(batch_x)
      loss = criterion(y_pred, batch_y)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

  # evaluation
  model.eval()
  total = 0
  correct = 0

  with torch.no_grad():
    for batch_x, batch_y in test_loader:
      batch_x = batch_x.to(device)
      batch_y = batch_y.to(device)
      y_pred = model(batch_x)
      _, predicted = torch.max(y_pred.data, 1)
      total += batch_y.shape[0]
      correct += (predicted == batch_y).sum().item()

    Accuracy = 100 * correct / total
  return Accuracy

In [31]:
study = optuna.create_study(direction='maximize')

[I 2025-06-05 08:17:18,048] A new study created in memory with name: no-name-f2053433-e05a-4065-bcf4-186d6bc17088


In [32]:
study.optimize(objective, n_trials=10)

[I 2025-06-05 08:19:00,425] Trial 0 finished with value: 50.5 and parameters: {'num_hidden_layers': 4, 'neurons_per_layers': 48, 'dropout_rate': 0.2, 'batch_size': 128, 'epoch': 40, 'learning_rate': 3.442703747518311e-05, 'optimizer': 'SGD', 'weight_decay': 5.1951821676832875e-05}. Best is trial 0 with value: 50.5.
[I 2025-06-05 08:24:02,654] Trial 1 finished with value: 95.03 and parameters: {'num_hidden_layers': 3, 'neurons_per_layers': 96, 'dropout_rate': 0.1, 'batch_size': 32, 'epoch': 50, 'learning_rate': 0.00022687726882254536, 'optimizer': 'SGD', 'weight_decay': 3.1151211393492894e-05}. Best is trial 1 with value: 95.03.
[I 2025-06-05 08:26:33,240] Trial 2 finished with value: 97.93 and parameters: {'num_hidden_layers': 5, 'neurons_per_layers': 88, 'dropout_rate': 0.1, 'batch_size': 64, 'epoch': 30, 'learning_rate': 0.036711139882733695, 'optimizer': 'SGD', 'weight_decay': 0.0005269403682858592}. Best is trial 2 with value: 97.93.
[I 2025-06-05 08:30:04,704] Trial 3 finished wit

In [33]:
study.best_value, study.best_params

(97.93,
 {'num_hidden_layers': 5,
  'neurons_per_layers': 88,
  'dropout_rate': 0.1,
  'batch_size': 64,
  'epoch': 30,
  'learning_rate': 0.036711139882733695,
  'optimizer': 'SGD',
  'weight_decay': 0.0005269403682858592})

### We can do MLFlow tracking to conclude more better our parameters values
[https://mlflow.org/docs/latest/getting-started/index.html](https://mlflow.org/docs/latest/getting-started/index.html)