In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

784 -> 128 -> 64 -> 10 (for 10 different classes like - shoe, shirt, jacket etc.)

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x1cc89522770>

In [10]:
df = pd.read_csv('fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [11]:
x = df.iloc[:, 1:].values
y = df.iloc[:, 0].values

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [13]:
x_train = x_train/255.0
x_test = x_test/255.0

In [14]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long) 
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]       

In [15]:
train_dataset = CustomDataset(x_train, y_train)

In [None]:
len(train_dataset)
train_dataset[0]


(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0

In [16]:
test_dataset = CustomDataset(x_test, y_test)

In [17]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [18]:
class MyNN(nn.Module):
    def __init__(self, input_dim, output_dim, num_hidden_layers,hidden_layer_size):
        super().__init__()
        layers=[]
        for i in range(num_hidden_layers):
            layers.append(nn.Linear(input_dim, hidden_layer_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3))
            input_dim = hidden_layer_size
        layers.append(nn.Linear(hidden_layer_size, output_dim))
        
        self.model = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.model(x)    

In [19]:
def objective(trial):
    num_hidden_layers = trial.suggest_int('num_hidden_layers', 1, 6)
    hidden_layer_size = trial.suggest_int('hidden_layer_size', 8, 512, step=8)
    
    input_size = x_train.shape[1]
    output_size = 10
    
    model = MyNN(input_size, output_size, num_hidden_layers, hidden_layer_size)
    
    # Define criterion and optimizer inside the function
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    learning_rate = 0.01
    epochs = 50
    
    model.train()
    for epoch in range(epochs):
        for batch_features, batch_labels in train_loader:
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    model.eval()
    correct = 0
    total = 0  
    with torch.no_grad():
        for batch_features, batch_labels in test_loader:
            outputs = model(batch_features)
            _, predicted = torch.max(outputs.data, 1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()
    accuracy = correct / total        
    return accuracy

In [20]:
import optuna

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

[I 2025-07-12 17:02:29,281] A new study created in memory with name: no-name-b8edba7d-36e5-497f-8941-15c38096149c
[I 2025-07-12 17:03:04,538] Trial 0 finished with value: 0.8191666666666667 and parameters: {'num_hidden_layers': 1, 'hidden_layer_size': 440}. Best is trial 0 with value: 0.8191666666666667.
[I 2025-07-12 17:03:04,538] Trial 0 finished with value: 0.8191666666666667 and parameters: {'num_hidden_layers': 1, 'hidden_layer_size': 440}. Best is trial 0 with value: 0.8191666666666667.
[I 2025-07-12 17:03:29,472] Trial 1 finished with value: 0.6875 and parameters: {'num_hidden_layers': 3, 'hidden_layer_size': 136}. Best is trial 0 with value: 0.8191666666666667.
[I 2025-07-12 17:03:29,472] Trial 1 finished with value: 0.6875 and parameters: {'num_hidden_layers': 3, 'hidden_layer_size': 136}. Best is trial 0 with value: 0.8191666666666667.
[I 2025-07-12 17:04:09,272] Trial 2 finished with value: 0.25166666666666665 and parameters: {'num_hidden_layers': 5, 'hidden_layer_size': 160

In [21]:
study.best_params, study.best_value

({'num_hidden_layers': 1, 'hidden_layer_size': 440}, 0.8191666666666667)