In [25]:
!pip install optuna
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split



In [26]:
device = torch.device("cuda"if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [27]:
data = pd.read_csv("fmnist_small.csv")
data

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,1,0,0,0,0,0,0,0,0,0,...,69,12,0,0,0,0,0,0,0,0
5996,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5997,8,0,0,0,0,0,0,0,0,0,...,39,47,2,0,0,29,0,0,0,0
5998,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
x = data.drop(['label'], axis= 1).to_numpy()
y = data['label'].to_numpy()

In [29]:
x = x/255.0

In [30]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size = 0.2, random_state=42)

In [31]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = torch.tensor(features, dtype= torch.float32)
    self.labels = torch.tensor(labels, dtype = torch.long)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, index):
    return self.features[index], self.labels[index]

In [32]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [33]:
train_dataloader = DataLoader(train_dataset, batch_size = 64, shuffle = True, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size = 64, shuffle = False, pin_memory=True)

In [34]:
class mynn(nn.Module):
  def __init__(self, inp_dim, out_dim, num_hidden_layers, nureon_per_layers):
    super().__init__()

    layers = []

    for i in range(num_hidden_layers):
      layers.append(nn.Linear(inp_dim, nureon_per_layers))
      layers.append(nn.BatchNorm1d(nureon_per_layers))
      layers.append(nn.ReLU())
      layers.append(nn.Dropout(0.3))
      inp_dim = nureon_per_layers
    layers.append(nn.Linear(nureon_per_layers, out_dim))

    self.model = nn.Sequential(*layers)

  def forward(self, x):
    return self.model(x)



In [35]:
def Objective(trial):
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 100)
  nureon_per_layers = trial.suggest_int("nureon_per_layers", 1, 1000)

  inp_dim = X_train.shape[1]
  out_dim = 10

  model = mynn(inp_dim, out_dim, num_hidden_layers, nureon_per_layers)
  model = model.to(device)

  learning_rate = 0.01
  epochs = 50

  optimizer = optim.SGD(model.parameters(), lr = learning_rate)
  criteron = nn.CrossEntropyLoss()

  for epoch in range(epochs):
    for batch_features, batch_labels in train_dataloader:
      batch_features = batch_features.to(device)
      batch_labels = batch_labels.to(device)
      outputs = model(batch_features)
      loss = criteron(outputs, batch_labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # evaluation
  model.eval()
  # evaluation on test data
  total = 0
  correct = 0

  with torch.no_grad():

    for batch_features, batch_labels in test_dataloader:

      # move data to gpu
      batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

      outputs = model(batch_features)

      _, predicted = torch.max(outputs, 1)

      total = total + batch_labels.shape[0]

      correct = correct + (predicted == batch_labels).sum().item()

    accuracy = correct/total

  return accuracy

In [36]:
study = optuna.create_study(direction = "maximize")
study.optimize(Objective, n_trials = 10)

[I 2025-05-14 12:36:15,186] A new study created in memory with name: no-name-b32ecd69-7f61-47c3-9fb8-fd21b63e4ce8
[I 2025-05-14 12:38:21,945] Trial 0 finished with value: 0.10666666666666667 and parameters: {'num_hidden_layers': 78, 'nureon_per_layers': 8}. Best is trial 0 with value: 0.10666666666666667.
[I 2025-05-14 12:40:17,503] Trial 1 finished with value: 0.10666666666666667 and parameters: {'num_hidden_layers': 75, 'nureon_per_layers': 247}. Best is trial 0 with value: 0.10666666666666667.
[I 2025-05-14 12:42:28,168] Trial 2 finished with value: 0.10666666666666667 and parameters: {'num_hidden_layers': 81, 'nureon_per_layers': 598}. Best is trial 0 with value: 0.10666666666666667.
[I 2025-05-14 12:43:04,797] Trial 3 finished with value: 0.5075 and parameters: {'num_hidden_layers': 20, 'nureon_per_layers': 561}. Best is trial 3 with value: 0.5075.
[I 2025-05-14 12:44:35,585] Trial 4 finished with value: 0.09416666666666666 and parameters: {'num_hidden_layers': 54, 'nureon_per_lay

In [37]:
study.best_trial.params

{'num_hidden_layers': 13, 'nureon_per_layers': 807}

In [38]:
study.best_trial.value

0.7216666666666667