In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim 
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x111b6754e30>

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [4]:
df = pd.read_csv("fashion-mnist_train.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [6]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [7]:
x_train = x_train/255.0
x_test = x_test/255.0

In [8]:
class CustomDataset(Dataset):

    def __init__(self, features, labels):

        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]
        

In [9]:
train_dataset = CustomDataset(x_train,y_train)

In [10]:
test_dataset = CustomDataset(x_test,y_test)

In [11]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, pin_memory=True)

In [16]:
class MyNN(nn.Module):

    def __init__(self, input_dim, output_dim, num_hidden_layers, neurons_per_layer):
        super().__init__()

        layers = []

        for i in range(num_hidden_layers):

            layers.append(nn.Linear(input_dim, neurons_per_layer)) 
            layers.append(nn.BatchNorm1d(neurons_per_layer))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3))
            input_dim = neurons_per_layer

        layers.append(nn.Linear(neurons_per_layer,output_dim))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
# Objective function 

def objective(trial):

    # next hyperparameter values from the search space 
    num_hidden_layers = trial.suggest_int("num_hidden_layers",1,5)
    neurons_per_layer = trial.suggest_int("neurons_per_layer",8,128,step=8)
    epochs = trial.suggest_int('epochs',10,50,steps=10)
    learning_rate = trial.suggest_float("learning_rate",1e-5,1e-1,log=True)
    dropout_rate = trial.suggest_float("dropout_rate",0.1,0.5,step=0.1)
    batch_size = trial.suggest_categorical("batch_size",[16,32,64,128])
    optimizer_name = trial.suggest_categorical("optimizer",['Adam','SGD','RMSprop'])
    weight_decay = trial.suggest_float('weight_decay',1e-5,1e-3,log=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

    # model init 
    input_dim = 784
    output_dim = 10

    model = MyNN(input_dim, output_dim, num_hidden_layers, neurons_per_layer)
    model.to(device)

    # params init 
    learning_rate = 0.01
    epochs = 50

    # optimizer selection
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)

    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)


    # training loop 
    for epoch in range(epochs):

        for batch_features, batch_labels in train_loader:

            # move data to gpu 
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

            # forward pass
            outputs = model(batch_features)

            # calculate loss
            loss = criterion(outputs, batch_labels)

            # back pass
            optimizer.zero_grad()
            loss.backward()

            # update grads
            optimizer.step()

            # total_epoch_loss = total_epoch_loss + loss.item()
        

    # evaluation 
    model.eval()
    total = 0
    correct = 0

    with torch.no_grad():

        for batch_features, batch_labels in test_loader:

            # move data to gpu 
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

            outputs = model(batch_features)

            _, predicted = torch.max(outputs,1)

            total = total+ batch_labels.shape[0]

            correct = correct + (predicted == batch_labels).sum().item()

    accuracy = correct/total


    return accuracy

In [16]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl.metadata (9.8 kB)
Collecting tqdm (from optuna)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting PyYAML (from optuna)
  Downloading PyYAML-6.0.2-cp311-cp311-win_amd64.whl.metadata (2.1 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.4-cp311-cp311-win_amd64.whl.metadata (4.2 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
Downloading alembic-1.16.4-py3-none-any.whl (247 kB)
Downloading sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl (2.1 MB)
  

In [21]:
import optuna

study = optuna.create_study(direction='maximize')

[I 2025-08-20 17:08:17,729] A new study created in memory with name: no-name-9c3cfecf-d2c1-4b57-8e73-39374fe40761


In [24]:
study.optimize(objective,n_trials=10)

[I 2025-08-20 17:17:17,215] Trial 1 finished with value: 0.8865833333333333 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 104}. Best is trial 1 with value: 0.8865833333333333.
[I 2025-08-20 17:25:41,266] Trial 2 finished with value: 0.7636666666666667 and parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 24}. Best is trial 1 with value: 0.8865833333333333.
[W 2025-08-20 17:26:48,204] Trial 3 failed with parameters: {'num_hidden_layers': 5, 'neurons_per_layer': 40} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\lokes\miniconda3\envs\toch\Lib\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\lokes\AppData\Local\Temp\ipykernel_21768\3362789753.py", line 34, in objective
    outputs = model(batch_features)
              ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lokes\miniconda3\envs\toch\Lib\site-packages\torch\n

KeyboardInterrupt: 