In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torchinfo import summary

In [2]:
torch.manual_seed(42)

<torch._C.Generator at 0x13156bf90>

In [3]:
df = pd.read_csv('../2. Dataset/fmnist_small.csv')


In [4]:
x = df.iloc[:, 1:].values/255.0
y = df.iloc[:,0].values

In [5]:
xtrain , xtest , ytrain , ytest = train_test_split( x , y , test_size=0.2 , random_state=20)

In [6]:
from torchvision import transforms

# Define data augmentations for the training dataset
# Most torchvision transforms are built for PIL Images, not NumPy arrays or tensors

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(0, translate=(0.1, 0.1)),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])



In [7]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

  def __init__(self, features, labels,transform = None):

     self.features = torch.tensor(features, dtype=torch.float32).reshape(-1, 1, 28, 28)
     self.labels = torch.tensor(labels, dtype=torch.long)
     self.transform = transform

  def __len__(self):

    return len(self.features)

  def __getitem__(self, index):

    feature, label = self.features[index], self.labels[index]
    
    if self.transform:

      # feature = self.transform(feature.squeeze(0).numpy())  
      # Apply transform (like we are send gray scale image so ) , For numpy : (for gray (h,w) , for color : (h,w,c))
      # toTensor (auto add 1 dim for channel)

      feature = self.transform(feature) # if we used tensor. For tensor (toPIL : (c,h,w)) 

      
    return feature, label


In [8]:
train_dataset = CustomDataset(xtrain,ytrain)
test_dataset = CustomDataset(xtest,ytest)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True , pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True , pin_memory=True)
# helps to faster copy to gpu

In [9]:

class DynamicCNN(nn.Module):

    def __init__(self, in_channels,num_conv_layers, num_filters, kernel_size, num_fc_layers, fc_layer_size, dropout_rate):
        
        super().__init__()

        layers = []

        # Convolutional layers
        
        for i in range(num_conv_layers):

            layers.append(nn.Conv2d(in_channels, num_filters, kernel_size=kernel_size, padding='same'))
            layers.append(nn.BatchNorm2d(num_filters))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            in_channels = num_filters  # Update input channels for the next layer

        self.features = nn.Sequential(*layers)

        # Fully connected layers
        
        fc_layers = [nn.Flatten()]

        input_size = num_filters * (28 // (2 ** num_conv_layers)) ** 2

        for i in range(num_fc_layers):

            fc_layers.append(nn.Linear(input_size, fc_layer_size))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_rate))
            input_size = fc_layer_size

        fc_layers.append(nn.Linear(input_size, 10))  # Final layer for 10 classes

        self.classifier = nn.Sequential(*fc_layers)

    def forward(self, x):

        x = self.features(x)
        x = self.classifier(x)
        
        return x


In [10]:
device = 'cpu'
if hasattr(torch,'mps') and torch.backends.mps.is_available():
    device = 'mps'
    print("MPS is available")

MPS is available


In [11]:

def objective(trial):

    # Hyperparameters to tune
    num_conv_layers = trial.suggest_int('num_conv_layers', 1, 3)
    num_filters = trial.suggest_categorical('num_filters', [16, 32, 64, 128])
    kernel_size = trial.suggest_categorical('kernel_size', [3, 5])
    num_fc_layers = trial.suggest_int('num_fc_layers', 1, 3)
    fc_layer_size = trial.suggest_categorical('fc_layer_size', [64, 128, 256])

    dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
    # dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)
    
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
    # Alt way : learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)

    optimizer_name = trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'RMSprop'])
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    num_epochs = trial.suggest_int('num_epochs', 10, 30)

    # Model
    num_channels = 1  #for grayscale
    model = DynamicCNN(1,num_conv_layers, num_filters, kernel_size, num_fc_layers, fc_layer_size, dropout_rate).to(device)

    # Data
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Optimizer
    if optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Loss function
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(num_epochs):
        
        model.train()

        for batch_features, batch_labels in train_loader:

            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            
            optimizer.zero_grad()
            
            outputs = model(batch_features)
            
            loss = criterion(outputs, batch_labels)
            
            loss.backward()
            
            optimizer.step()

    # Validation loop
    model.eval()  # how the model layers behave like disable dropout and uses running mean and variance.

    correct = 0
    total = 0
    
    with torch.no_grad():

        for batch_features, batch_labels in test_loader:
        
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
            outputs = model(batch_features)
            _, predicted = torch.max(outputs, 1)
        
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct / total
    return accuracy

In [16]:
import optuna

pruner = optuna.pruners.MedianPruner() 
# A pruner in Optuna is a smart early-stopping strategy.
# It stops training of unpromising trials to save time.

study = optuna.create_study(direction='maximize', pruner=pruner)
study.optimize(objective, n_trials=5)  # Run 50 trials

[I 2025-12-05 18:44:36,127] A new study created in memory with name: no-name-93de6f42-2afb-4b3a-a4b9-f44fa7624c2e
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-2)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)
[I 2025-12-05 18:45:05,779] Trial 0 finished with value: 0.8633333333333333 and parameters: {'num_conv_layers': 3, 'num_filters': 64, 'kernel_size': 5, 'num_fc_layers': 3, 'fc_layer_size': 64, 'dropout_rate': 0.3254058123743253, 'weight_decay': 0.0002353487227781484, 'learning_rate': 0.00042245235879104047, 'optimizer': 'RMSprop', 'batch_size': 32, 'num_epochs': 18}. Best is trial 0 with value: 0.8633333333333333.
[I 2025-12-05 18:46:14,488] Trial 1 finished with value: 0.8875 and parameters: {'num_conv_layers': 2, 'num_filters': 128, 'kernel_size': 5, 'num_fc_layers': 2, 'fc_layer_size': 256, 'dropout_rate': 0.28197850749933656, 'weight_decay': 2.9594569378748e-05, 'lea

In [17]:
print("Best hyperparameters:", study.best_params)
print("Best accuracy:", study.best_value)

Best hyperparameters: {'num_conv_layers': 2, 'num_filters': 128, 'kernel_size': 5, 'num_fc_layers': 2, 'fc_layer_size': 256, 'dropout_rate': 0.28197850749933656, 'weight_decay': 2.9594569378748e-05, 'learning_rate': 0.0009636052727236369, 'optimizer': 'Adam', 'batch_size': 64, 'num_epochs': 20}
Best accuracy: 0.8875
