In [11]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

batch_size = 64
num_classes = 10
learning_rate = 0.001

num_epochs = 20

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
all_transforms = transforms.Compose([transforms.Resize((32,32)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.5],
                                                          std=[0.5])
                                     ])
train_dataset = torchvision.datasets.FashionMNIST(root = './data',
                                             train = True,
                                             transform = all_transforms,
                                             download = True)

test_dataset = torchvision.datasets.FashionMNIST(root = './data',
                                            train = False,
                                            transform = all_transforms,
                                            download=True)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)
                                           

In [13]:
class ConvNeuralNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)
        self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        self.fc1 = nn.Linear(1600, 128)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)
    
    # Progresses data across layers    
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.conv_layer2(out)
        out = self.max_pool1(out)
        
        out = self.conv_layer3(out)
        out = self.conv_layer4(out)
        out = self.max_pool2(out)
                
        out = out.reshape(out.size(0), -1)
        
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        return out

In [14]:
model = ConvNeuralNet(num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  

total_step = len(train_loader)

In [15]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

Epoch [1/20], Loss: 0.7884
Epoch [2/20], Loss: 0.4250
Epoch [3/20], Loss: 0.4920
Epoch [4/20], Loss: 0.5076
Epoch [5/20], Loss: 0.3309
Epoch [6/20], Loss: 0.4650
Epoch [7/20], Loss: 0.5801
Epoch [8/20], Loss: 0.3866
Epoch [9/20], Loss: 0.2319
Epoch [10/20], Loss: 0.1671
Epoch [11/20], Loss: 0.4188
Epoch [12/20], Loss: 0.2109
Epoch [13/20], Loss: 0.3924
Epoch [14/20], Loss: 0.3434
Epoch [15/20], Loss: 0.1196
Epoch [16/20], Loss: 0.5149
Epoch [17/20], Loss: 0.1815
Epoch [18/20], Loss: 0.4492
Epoch [19/20], Loss: 0.2038
Epoch [20/20], Loss: 0.2250


In [16]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy of the network on the {} train images: {} %'.format(50000, 100 * correct / total))

Accuracy of the network on the 50000 train images: 88.35 %


## Grid Search for Hyperparameter Tuning

Testing different combinations of:
- Optimizers: SGD, Adam
- Learning rates: 0.001, 0.01, 0.1
- Weight decay: 0, 0.005

Total configurations: 2 × 3 × 2 = 12

In [17]:
# Grid search hyperparameters
optimizers_to_test = ['sgd', 'adam']
learning_rates = [0.001, 0.01, 0.1]
weight_decays = [0, 0.005]

# Store results
results = []

print(f"Total configurations to test: {len(optimizers_to_test) * len(learning_rates) * len(weight_decays)}")

Total configurations to test: 12


In [18]:
import time

config_num = 0
for optimizer_name in optimizers_to_test:
    for lr in learning_rates:
        for wd in weight_decays:
            config_num += 1
            print(f"\n{'='*60}")
            print(f"Configuration {config_num}/12")
            print(f"Optimizer: {optimizer_name}, LR: {lr}, Weight Decay: {wd}")
            print(f"{'='*60}")
            
            # Create fresh model
            model = ConvNeuralNet(num_classes).to(device)
            
            # Create optimizer based on type
            if optimizer_name == 'sgd':
                optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd, momentum=0.9)
            else:  # adam
                optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
            
            criterion = nn.CrossEntropyLoss()
            
            # Training
            start_time = time.time()
            for epoch in range(num_epochs):
                model.train()
                for i, (images, labels) in enumerate(train_loader):
                    images = images.to(device)
                    labels = labels.to(device)
                    
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                
                if (epoch + 1) % 5 == 0:  # Print every 5 epochs
                    print(f'  Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
            
            training_time = time.time() - start_time
            
            # Evaluation on test set
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                for images, labels in test_loader:
                    images = images.to(device)
                    labels = labels.to(device)
                    outputs = model(images)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                
                test_accuracy = 100 * correct / total
            
            print(f"  Test Accuracy: {test_accuracy:.2f}%")
            print(f"  Training Time: {training_time:.2f}s")
            
            # Store results
            results.append({
                'optimizer': optimizer_name,
                'learning_rate': lr,
                'weight_decay': wd,
                'test_accuracy': test_accuracy,
                'training_time': training_time
            })

print(f"\n{'='*60}")
print("Grid search completed!")
print(f"{'='*60}")


Configuration 1/12
Optimizer: sgd, LR: 0.001, Weight Decay: 0
  Epoch [5/20], Loss: 0.2848
  Epoch [10/20], Loss: 0.2485
  Epoch [15/20], Loss: 0.0997
  Epoch [20/20], Loss: 0.3688
  Test Accuracy: 88.64%
  Training Time: 91.94s

Configuration 2/12
Optimizer: sgd, LR: 0.001, Weight Decay: 0.005
  Epoch [5/20], Loss: 0.5126
  Epoch [10/20], Loss: 0.3665
  Epoch [15/20], Loss: 0.2685
  Epoch [20/20], Loss: 0.2177
  Test Accuracy: 88.28%
  Training Time: 92.59s

Configuration 3/12
Optimizer: sgd, LR: 0.01, Weight Decay: 0
  Epoch [5/20], Loss: 0.2046
  Epoch [10/20], Loss: 0.2014
  Epoch [15/20], Loss: 0.0366
  Epoch [20/20], Loss: 0.0306
  Test Accuracy: 90.26%
  Training Time: 91.93s

Configuration 4/12
Optimizer: sgd, LR: 0.01, Weight Decay: 0.005
  Epoch [5/20], Loss: 0.2373
  Epoch [10/20], Loss: 0.2847
  Epoch [15/20], Loss: 0.4507
  Epoch [20/20], Loss: 0.2508
  Test Accuracy: 88.44%
  Training Time: 92.14s

Configuration 5/12
Optimizer: sgd, LR: 0.1, Weight Decay: 0
  Epoch [5/20

In [19]:
# Display all results sorted by test accuracy
print("\n" + "="*80)
print("GRID SEARCH RESULTS - Sorted by Test Accuracy")
print("="*80)
print(f"{'Rank':<6} {'Optimizer':<10} {'LR':<10} {'Weight Decay':<13} {'Test Acc':<12} {'Time (s)':<10}")
print("-"*80)

# Sort results by test accuracy (descending)
sorted_results = sorted(results, key=lambda x: x['test_accuracy'], reverse=True)

for rank, result in enumerate(sorted_results, 1):
    print(f"{rank:<6} {result['optimizer']:<10} {result['learning_rate']:<10} "
          f"{result['weight_decay']:<13} {result['test_accuracy']:<12.2f} {result['training_time']:<10.2f}")

print("="*80)
print("\nBEST CONFIGURATION:")
best = sorted_results[0]
print(f"  Optimizer: {best['optimizer']}")
print(f"  Learning Rate: {best['learning_rate']}")
print(f"  Weight Decay: {best['weight_decay']}")
print(f"  Test Accuracy: {best['test_accuracy']:.2f}%")
print(f"  Training Time: {best['training_time']:.2f}s")
print("="*80)


GRID SEARCH RESULTS - Sorted by Test Accuracy
Rank   Optimizer  LR         Weight Decay  Test Acc     Time (s)  
--------------------------------------------------------------------------------
1      adam       0.001      0             90.31        93.79     
2      sgd        0.01       0             90.26        91.93     
3      adam       0.001      0.005         89.12        94.94     
4      sgd        0.001      0             88.64        91.94     
5      sgd        0.01       0.005         88.44        92.14     
6      sgd        0.001      0.005         88.28        92.59     
7      sgd        0.1        0             10.00        91.76     
8      sgd        0.1        0.005         10.00        91.40     
9      adam       0.01       0             10.00        94.36     
10     adam       0.01       0.005         10.00        94.47     
11     adam       0.1        0             10.00        94.65     
12     adam       0.1        0.005         10.00        94.86     



## Retrain Best Configuration with F1 Score

Retraining the best configuration (SGD, LR=0.01, Weight Decay=0) and saving the model.

In [22]:
# Best configuration from grid search
best_optimizer = 'adam'
best_lr = 0.001
best_wd = 0

print("Training with best configuration...")
print(f"Optimizer: {best_optimizer}, LR: {best_lr}, Weight Decay: {best_wd}\n")

# Create fresh model
best_model = ConvNeuralNet(num_classes).to(device)

# Create optimizer
optimizer = torch.optim.SGD(best_model.parameters(), lr=best_lr, weight_decay=best_wd, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Training
for epoch in range(num_epochs):
    best_model.train()
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = best_model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Save the best model
torch.save(best_model.state_dict(), 'best_cnn_model.pth')
print(f"\nModel saved to 'best_cnn_model.pth'")

Training with best configuration...
Optimizer: adam, LR: 0.001, Weight Decay: 0

Epoch [5/20], Loss: 0.5587
Epoch [10/20], Loss: 0.2909
Epoch [15/20], Loss: 0.2835
Epoch [20/20], Loss: 0.0893

Model saved to 'best_cnn_model.pth'
