Q1(a): Deep Learning Models on MNIST/FashionMNIST
Step-by-Step Implementation Approach:
Setup and Imports: Import necessary libraries

Data Preparation: Load datasets with 70-10-20 split

Model Definition: Create ResNet-18 and ResNet-50 models

Training Pipeline: Implement training with validation

Hyperparameter Testing: Test all combinations

Results Collection: Store accuracy for comparison

In [18]:
# Run this cell first in Google Colab or Kaggle
!pip install torch torchvision thop
!pip install scikit-learn pandas numpy matplotlib
!pip install GPUtil psutil



In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import numpy as np
from torch.cuda.amp import autocast, GradScaler
import time
from sklearn.metrics import accuracy_score
import pandas as pd
from torchvision.models import resnet18, resnet50

In [20]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [21]:
class ResNetMNIST(nn.Module):
    """Wrapper for ResNet to handle 1-channel MNIST images"""
    def __init__(self, model_type='resnet18', num_classes=10):
        super(ResNetMNIST, self).__init__()
        
        if model_type == 'resnet18':
            self.model = resnet18(pretrained=False)
            # Modify first conv layer for 1-channel input
            self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        elif model_type == 'resnet50':
            self.model = resnet50(pretrained=False)
            self.model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Modify final layer for 10 classes
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.model(x)


In [22]:
def load_datasets(dataset_name='MNIST'):
    """Load MNIST or FashionMNIST datasets"""
    transform = transforms.Compose([
        transforms.Resize((32, 32)),  # ResNet expects at least 32x32
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    
    if dataset_name == 'MNIST':
        dataset = torchvision.datasets.MNIST(
            root='./data', train=True, download=True, transform=transform
        )
        test_dataset = torchvision.datasets.MNIST(
            root='./data', train=False, download=True, transform=transform
        )
    else:  # FashionMNIST
        dataset = torchvision.datasets.FashionMNIST(
            root='./data', train=True, download=True, transform=transform
        )
        test_dataset = torchvision.datasets.FashionMNIST(
            root='./data', train=False, download=True, transform=transform
        )
    
    # Split train into train and validation (70%-10%-20%)
    train_size = int(0.7 * len(dataset))
    val_size = int(0.1 * len(dataset))
    test_size = len(dataset) - train_size - val_size
    
    train_dataset, val_dataset, _ = random_split(
        dataset, [train_size, val_size, test_size]
    )
    
    return train_dataset, val_dataset, test_dataset

In [23]:
def train_epoch(model, train_loader, optimizer, criterion, device, scaler, use_amp=True):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        
        if use_amp:
            with autocast():
                output = model(data)
                loss = criterion(output, target)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        
        running_loss += loss.item()
        _, preds = torch.max(output, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(target.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    return running_loss / len(train_loader), accuracy

In [24]:

def validate(model, val_loader, criterion, device, use_amp=True):
    """Validate the model"""
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            
            if use_amp:
                with autocast():
                    output = model(data)
                    loss = criterion(output, target)
            else:
                output = model(data)
                loss = criterion(output, target)
            
            val_loss += loss.item()
            _, preds = torch.max(output, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(target.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    return val_loss / len(val_loader), accuracy

In [25]:
def test(model, test_loader, device, use_amp=True):
    """Test the model"""
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            
            if use_amp:
                with autocast():
                    output = model(data)
            else:
                output = model(data)
            
            _, preds = torch.max(output, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(target.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy


In [26]:
def run_experiment(dataset_name, model_type, batch_size, optimizer_name, lr, 
                   pin_memory=False, epochs=10, use_amp=True):
    """Run a complete experiment"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Load datasets
    train_dataset, val_dataset, test_dataset = load_datasets(dataset_name)
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, 
                             pin_memory=pin_memory, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                           pin_memory=pin_memory, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                            pin_memory=pin_memory, num_workers=2)
    
    # Create model
    model = ResNetMNIST(model_type=model_type, num_classes=10).to(device)
    
    # Define loss function
    criterion = nn.CrossEntropyLoss()
    
    # Define optimizer
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    else:  # Adam
        optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Mixed precision training
    scaler = GradScaler() if use_amp else None
    
    # Training loop
    best_val_acc = 0
    train_times = []
    
    print(f"\nTraining {model_type} on {dataset_name}")
    print(f"Batch size: {batch_size}, Optimizer: {optimizer_name}, LR: {lr}")
    
    for epoch in range(epochs):
        start_time = time.time()
        
        # Train
        train_loss, train_acc = train_epoch(
            model, train_loader, optimizer, criterion, device, 
            scaler, use_amp
        )
        
        # Validate
        val_loss, val_acc = validate(model, val_loader, criterion, device, use_amp)
        
        epoch_time = time.time() - start_time
        train_times.append(epoch_time)
        
        print(f"Epoch {epoch+1}/{epochs}: "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, "
              f"Time: {epoch_time:.2f}s")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
    
    # Test
    test_acc = test(model, test_loader, device, use_amp)
    avg_train_time = np.mean(train_times)
    
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Average training time per epoch: {avg_train_time:.2f}s")
    
    return test_acc * 100, avg_train_time


In [27]:
# Run experiments for all hyperparameter combinations
def run_all_experiments(dataset_name='MNIST'):
    results = []
    
    # Hyperparameter combinations
    batch_sizes = [16, 32]
    optimizers = ['SGD', 'Adam']
    learning_rates = [0.001, 0.0001]
    models = ['resnet18', 'resnet50']
    epochs_list = [3, 5]  # Two different epoch counts
    
    for batch_size in batch_sizes:
        for optimizer_name in optimizers:
            for lr in learning_rates:
                for model_type in models:
                    for epochs in epochs_list:
                        # Test with pin_memory=True (better for GPU)
                        pin_memory = torch.cuda.is_available()
                        
                        print(f"\n{'='*60}")
                        print(f"Experiment: {model_type}, BS={batch_size}, "
                              f"Opt={optimizer_name}, LR={lr}, Epochs={epochs}")
                        print('='*60)
                        
                        try:
                            test_acc, train_time = run_experiment(
                                dataset_name=dataset_name,
                                model_type=model_type,
                                batch_size=batch_size,
                                optimizer_name=optimizer_name,
                                lr=lr,
                                pin_memory=pin_memory, 
                                epochs=epochs,
                                use_amp=True  # Constant as per requirements
                            )
                            
                            results.append({
                                'Dataset': dataset_name,
                                'Model': model_type,
                                'Batch_Size': batch_size,
                                'Optimizer': optimizer_name,
                                'Learning_Rate': lr,
                                'Epochs': epochs,
                                'Test_Accuracy_%': test_acc,
                                'Avg_Train_Time_s': train_time,
                                'Pin_Memory': pin_memory
                            })
                            
                        except Exception as e:
                            print(f"Error in experiment: {e}")
                            continue
    
    # Create results DataFrame
    df_results = pd.DataFrame(results)
    return df_results

In [None]:
# Run experiments for MNIST
print("Running experiments for MNIST dataset...")
mnist_results = run_all_experiments('MNIST')

# Run experiments for FashionMNIST
print("\n\nRunning experiments for FashionMNIST dataset...")
fashion_results = run_all_experiments('FashionMNIST')

# Combine results
all_results = pd.concat([mnist_results, fashion_results], ignore_index=True)

# Save results
all_results.to_csv('experiment_results.csv', index=False)

# Display results in table format
print("\n\nResults Summary:")
print("="*100)

Running experiments for MNIST dataset...

Experiment: resnet18, BS=16, Opt=SGD, LR=0.001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.1805, Train Acc: 0.9437, Val Loss: 0.0571, Val Acc: 0.9833, Time: 37.09s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0633, Train Acc: 0.9799, Val Loss: 0.0449, Val Acc: 0.9868, Time: 37.35s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0380, Train Acc: 0.9885, Val Loss: 0.0473, Val Acc: 0.9857, Time: 37.17s


  with autocast():


Test Accuracy: 0.9881
Average training time per epoch: 37.20s

Experiment: resnet18, BS=16, Opt=SGD, LR=0.001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.1838, Train Acc: 0.9442, Val Loss: 0.0467, Val Acc: 0.9855, Time: 37.62s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.0616, Train Acc: 0.9810, Val Loss: 0.0389, Val Acc: 0.9902, Time: 36.97s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0391, Train Acc: 0.9875, Val Loss: 0.0279, Val Acc: 0.9918, Time: 37.65s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0265, Train Acc: 0.9917, Val Loss: 0.0240, Val Acc: 0.9932, Time: 36.97s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0211, Train Acc: 0.9934, Val Loss: 0.0342, Val Acc: 0.9900, Time: 36.10s


  with autocast():


Test Accuracy: 0.9882
Average training time per epoch: 37.06s

Experiment: resnet50, BS=16, Opt=SGD, LR=0.001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.4705, Train Acc: 0.8666, Val Loss: 0.0969, Val Acc: 0.9705, Time: 77.37s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0943, Train Acc: 0.9729, Val Loss: 0.0529, Val Acc: 0.9848, Time: 77.79s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0579, Train Acc: 0.9825, Val Loss: 0.0449, Val Acc: 0.9855, Time: 79.26s


  with autocast():


Test Accuracy: 0.9848
Average training time per epoch: 78.14s

Experiment: resnet50, BS=16, Opt=SGD, LR=0.001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.4835, Train Acc: 0.8624, Val Loss: 0.0967, Val Acc: 0.9703, Time: 77.43s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.1115, Train Acc: 0.9677, Val Loss: 0.0692, Val Acc: 0.9795, Time: 78.12s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0624, Train Acc: 0.9815, Val Loss: 0.0660, Val Acc: 0.9808, Time: 77.02s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0392, Train Acc: 0.9881, Val Loss: 0.0508, Val Acc: 0.9852, Time: 77.93s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0319, Train Acc: 0.9907, Val Loss: 0.0469, Val Acc: 0.9865, Time: 77.06s


  with autocast():


Test Accuracy: 0.9870
Average training time per epoch: 77.51s

Experiment: resnet18, BS=16, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.3732, Train Acc: 0.8919, Val Loss: 0.1017, Val Acc: 0.9722, Time: 36.68s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.1198, Train Acc: 0.9647, Val Loss: 0.0700, Val Acc: 0.9802, Time: 36.20s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0801, Train Acc: 0.9767, Val Loss: 0.0571, Val Acc: 0.9828, Time: 36.29s


  with autocast():


Test Accuracy: 0.9856
Average training time per epoch: 36.39s

Experiment: resnet18, BS=16, Opt=SGD, LR=0.0001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.3620, Train Acc: 0.8976, Val Loss: 0.1122, Val Acc: 0.9680, Time: 36.98s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.1179, Train Acc: 0.9648, Val Loss: 0.0771, Val Acc: 0.9767, Time: 36.54s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0776, Train Acc: 0.9772, Val Loss: 0.0666, Val Acc: 0.9813, Time: 36.95s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0565, Train Acc: 0.9828, Val Loss: 0.0641, Val Acc: 0.9803, Time: 36.91s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0449, Train Acc: 0.9872, Val Loss: 0.0596, Val Acc: 0.9825, Time: 36.70s


  with autocast():


Test Accuracy: 0.9856
Average training time per epoch: 36.82s

Experiment: resnet50, BS=16, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.8844, Train Acc: 0.7071, Val Loss: 0.2533, Val Acc: 0.9220, Time: 77.00s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.2701, Train Acc: 0.9162, Val Loss: 0.1394, Val Acc: 0.9560, Time: 76.57s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.1761, Train Acc: 0.9453, Val Loss: 0.1042, Val Acc: 0.9682, Time: 76.71s


  with autocast():


Test Accuracy: 0.9698
Average training time per epoch: 76.76s

Experiment: resnet50, BS=16, Opt=SGD, LR=0.0001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.8583, Train Acc: 0.7148, Val Loss: 0.2794, Val Acc: 0.9150, Time: 76.86s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.2858, Train Acc: 0.9120, Val Loss: 0.1585, Val Acc: 0.9505, Time: 76.48s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.1958, Train Acc: 0.9383, Val Loss: 0.1095, Val Acc: 0.9678, Time: 77.12s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.1449, Train Acc: 0.9547, Val Loss: 0.0988, Val Acc: 0.9712, Time: 77.05s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.1192, Train Acc: 0.9634, Val Loss: 0.0881, Val Acc: 0.9748, Time: 77.74s


  with autocast():


Test Accuracy: 0.9761
Average training time per epoch: 77.05s

Experiment: resnet18, BS=16, Opt=Adam, LR=0.001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.2011, Train Acc: 0.9420, Val Loss: 0.0805, Val Acc: 0.9737, Time: 42.57s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0972, Train Acc: 0.9733, Val Loss: 0.0681, Val Acc: 0.9792, Time: 42.38s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0763, Train Acc: 0.9792, Val Loss: 0.0731, Val Acc: 0.9793, Time: 42.68s


  with autocast():


Test Accuracy: 0.9825
Average training time per epoch: 42.54s

Experiment: resnet18, BS=16, Opt=Adam, LR=0.001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.2035, Train Acc: 0.9406, Val Loss: 0.1260, Val Acc: 0.9667, Time: 42.08s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.0967, Train Acc: 0.9725, Val Loss: 0.1254, Val Acc: 0.9705, Time: 43.32s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0730, Train Acc: 0.9797, Val Loss: 0.0552, Val Acc: 0.9833, Time: 42.76s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0554, Train Acc: 0.9849, Val Loss: 0.0550, Val Acc: 0.9852, Time: 41.91s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0499, Train Acc: 0.9860, Val Loss: 0.0453, Val Acc: 0.9875, Time: 42.45s


  with autocast():


Test Accuracy: 0.9908
Average training time per epoch: 42.50s

Experiment: resnet50, BS=16, Opt=Adam, LR=0.001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.4061, Train Acc: 0.8980, Val Loss: 0.1091, Val Acc: 0.9715, Time: 87.60s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.2537, Train Acc: 0.9444, Val Loss: 0.0840, Val Acc: 0.9770, Time: 87.20s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.2021, Train Acc: 0.9556, Val Loss: 0.7006, Val Acc: 0.8695, Time: 89.49s


  with autocast():


Test Accuracy: 0.8816
Average training time per epoch: 88.10s

Experiment: resnet50, BS=16, Opt=Adam, LR=0.001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.4259, Train Acc: 0.8912, Val Loss: 0.1087, Val Acc: 0.9710, Time: 87.47s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.2612, Train Acc: 0.9424, Val Loss: 0.0825, Val Acc: 0.9763, Time: 88.85s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.2131, Train Acc: 0.9497, Val Loss: 0.1059, Val Acc: 0.9723, Time: 90.64s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.1289, Train Acc: 0.9701, Val Loss: 0.0824, Val Acc: 0.9817, Time: 88.45s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.1277, Train Acc: 0.9692, Val Loss: 0.0794, Val Acc: 0.9792, Time: 89.61s


  with autocast():


Test Accuracy: 0.9786
Average training time per epoch: 89.00s

Experiment: resnet18, BS=16, Opt=Adam, LR=0.0001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.2124, Train Acc: 0.9344, Val Loss: 0.0597, Val Acc: 0.9823, Time: 44.62s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0806, Train Acc: 0.9750, Val Loss: 0.0560, Val Acc: 0.9820, Time: 43.39s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0584, Train Acc: 0.9821, Val Loss: 0.0423, Val Acc: 0.9873, Time: 43.99s


  with autocast():


Test Accuracy: 0.9890
Average training time per epoch: 44.00s

Experiment: resnet18, BS=16, Opt=Adam, LR=0.0001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.1995, Train Acc: 0.9394, Val Loss: 0.0703, Val Acc: 0.9790, Time: 42.44s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.0783, Train Acc: 0.9761, Val Loss: 0.0558, Val Acc: 0.9830, Time: 43.05s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0538, Train Acc: 0.9835, Val Loss: 0.0548, Val Acc: 0.9860, Time: 43.17s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0419, Train Acc: 0.9873, Val Loss: 0.0578, Val Acc: 0.9822, Time: 43.25s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0326, Train Acc: 0.9899, Val Loss: 0.0362, Val Acc: 0.9887, Time: 43.87s


  with autocast():


Test Accuracy: 0.9909
Average training time per epoch: 43.15s

Experiment: resnet50, BS=16, Opt=Adam, LR=0.0001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.7504, Train Acc: 0.7516, Val Loss: 0.2387, Val Acc: 0.9260, Time: 89.72s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.2289, Train Acc: 0.9280, Val Loss: 0.1372, Val Acc: 0.9570, Time: 90.16s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.1346, Train Acc: 0.9586, Val Loss: 0.1043, Val Acc: 0.9682, Time: 89.63s


  with autocast():


Test Accuracy: 0.9707
Average training time per epoch: 89.84s

Experiment: resnet50, BS=16, Opt=Adam, LR=0.0001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 16, Optimizer: Adam, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.7300, Train Acc: 0.7570, Val Loss: 0.2565, Val Acc: 0.9182, Time: 89.14s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.2182, Train Acc: 0.9319, Val Loss: 0.1227, Val Acc: 0.9607, Time: 89.45s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.1315, Train Acc: 0.9587, Val Loss: 0.1004, Val Acc: 0.9707, Time: 89.36s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0935, Train Acc: 0.9723, Val Loss: 0.0910, Val Acc: 0.9733, Time: 89.74s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0722, Train Acc: 0.9795, Val Loss: 0.0589, Val Acc: 0.9825, Time: 89.70s


  with autocast():


Test Accuracy: 0.9835
Average training time per epoch: 89.48s

Experiment: resnet18, BS=32, Opt=SGD, LR=0.001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.1758, Train Acc: 0.9461, Val Loss: 0.0611, Val Acc: 0.9818, Time: 20.81s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0530, Train Acc: 0.9834, Val Loss: 0.0477, Val Acc: 0.9852, Time: 19.67s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0311, Train Acc: 0.9900, Val Loss: 0.0417, Val Acc: 0.9880, Time: 19.46s


  with autocast():


Test Accuracy: 0.9876
Average training time per epoch: 19.98s

Experiment: resnet18, BS=32, Opt=SGD, LR=0.001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.1730, Train Acc: 0.9477, Val Loss: 0.0603, Val Acc: 0.9803, Time: 19.54s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.0569, Train Acc: 0.9828, Val Loss: 0.0429, Val Acc: 0.9862, Time: 20.24s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0342, Train Acc: 0.9888, Val Loss: 0.0440, Val Acc: 0.9853, Time: 19.32s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0200, Train Acc: 0.9937, Val Loss: 0.0360, Val Acc: 0.9878, Time: 19.87s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0160, Train Acc: 0.9951, Val Loss: 0.0308, Val Acc: 0.9902, Time: 19.23s


  with autocast():


Test Accuracy: 0.9884
Average training time per epoch: 19.64s

Experiment: resnet50, BS=32, Opt=SGD, LR=0.001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.4383, Train Acc: 0.8624, Val Loss: 0.1329, Val Acc: 0.9597, Time: 39.45s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.1133, Train Acc: 0.9657, Val Loss: 0.0764, Val Acc: 0.9780, Time: 40.30s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0688, Train Acc: 0.9790, Val Loss: 0.0606, Val Acc: 0.9822, Time: 40.35s


  with autocast():


Test Accuracy: 0.9819
Average training time per epoch: 40.03s

Experiment: resnet50, BS=32, Opt=SGD, LR=0.001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.4086, Train Acc: 0.8705, Val Loss: 0.1204, Val Acc: 0.9635, Time: 39.07s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.1099, Train Acc: 0.9656, Val Loss: 0.0721, Val Acc: 0.9800, Time: 39.24s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0690, Train Acc: 0.9780, Val Loss: 0.0560, Val Acc: 0.9837, Time: 39.65s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0387, Train Acc: 0.9875, Val Loss: 0.0560, Val Acc: 0.9853, Time: 38.83s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0308, Train Acc: 0.9902, Val Loss: 0.0540, Val Acc: 0.9868, Time: 39.02s


  with autocast():


Test Accuracy: 0.9844
Average training time per epoch: 39.16s

Experiment: resnet18, BS=32, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda

Training resnet18 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.4704, Train Acc: 0.8705, Val Loss: 0.1618, Val Acc: 0.9573, Time: 20.81s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.1425, Train Acc: 0.9588, Val Loss: 0.1019, Val Acc: 0.9683, Time: 19.87s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0936, Train Acc: 0.9740, Val Loss: 0.0821, Val Acc: 0.9752, Time: 20.14s


  with autocast():


Test Accuracy: 0.9771
Average training time per epoch: 20.28s

Experiment: resnet18, BS=32, Opt=SGD, LR=0.0001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.4634, Train Acc: 0.8751, Val Loss: 0.1602, Val Acc: 0.9543, Time: 19.63s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.1383, Train Acc: 0.9605, Val Loss: 0.1057, Val Acc: 0.9692, Time: 19.69s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0919, Train Acc: 0.9737, Val Loss: 0.0848, Val Acc: 0.9742, Time: 20.52s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0670, Train Acc: 0.9806, Val Loss: 0.0739, Val Acc: 0.9775, Time: 20.10s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0530, Train Acc: 0.9857, Val Loss: 0.0677, Val Acc: 0.9788, Time: 19.69s


  with autocast():


Test Accuracy: 0.9801
Average training time per epoch: 19.93s

Experiment: resnet50, BS=32, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 32, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.1727, Train Acc: 0.9485, Val Loss: 0.0737, Val Acc: 0.9783, Time: 22.93s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.0789, Train Acc: 0.9780, Val Loss: 0.0606, Val Acc: 0.9833, Time: 22.87s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.0608, Train Acc: 0.9829, Val Loss: 0.0504, Val Acc: 0.9858, Time: 22.76s


  with autocast():


Test Accuracy: 0.9863
Average training time per epoch: 22.85s

Experiment: resnet18, BS=32, Opt=Adam, LR=0.001, Epochs=5
Using device: cuda

Training resnet18 on MNIST
Batch size: 32, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.1710, Train Acc: 0.9500, Val Loss: 0.0854, Val Acc: 0.9743, Time: 22.58s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.0786, Train Acc: 0.9780, Val Loss: 0.0464, Val Acc: 0.9853, Time: 23.34s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.0611, Train Acc: 0.9819, Val Loss: 0.0502, Val Acc: 0.9858, Time: 22.85s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.0530, Train Acc: 0.9847, Val Loss: 0.0490, Val Acc: 0.9862, Time: 23.39s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.0427, Train Acc: 0.9878, Val Loss: 0.0459, Val Acc: 0.9880, Time: 22.97s


  with autocast():


Test Accuracy: 0.9885
Average training time per epoch: 23.02s

Experiment: resnet50, BS=32, Opt=Adam, LR=0.001, Epochs=3
Using device: cuda





Training resnet50 on MNIST
Batch size: 32, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.3372, Train Acc: 0.9082, Val Loss: 0.1057, Val Acc: 0.9688, Time: 44.76s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.1504, Train Acc: 0.9626, Val Loss: 0.0986, Val Acc: 0.9742, Time: 45.44s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.1818, Train Acc: 0.9584, Val Loss: 0.1182, Val Acc: 0.9700, Time: 46.49s


  with autocast():


Test Accuracy: 0.9744
Average training time per epoch: 45.56s

Experiment: resnet50, BS=32, Opt=Adam, LR=0.001, Epochs=5
Using device: cuda





Training resnet50 on MNIST
Batch size: 32, Optimizer: Adam, LR: 0.001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.3475, Train Acc: 0.9054, Val Loss: 0.2585, Val Acc: 0.9392, Time: 45.62s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.3057, Train Acc: 0.8873, Val Loss: 0.3182, Val Acc: 0.8892, Time: 77.42s


  with autocast():


Test Accuracy: 0.8826
Average training time per epoch: 77.45s

Experiment: resnet18, BS=16, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda





Training resnet18 on FashionMNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 0.6366, Train Acc: 0.7818, Val Loss: 0.4049, Val Acc: 0.8497, Time: 37.70s


  with autocast():
  with autocast():


Epoch 2/3: Train Loss: 0.4114, Train Acc: 0.8530, Val Loss: 0.3505, Val Acc: 0.8693, Time: 36.69s


  with autocast():
  with autocast():


Epoch 3/3: Train Loss: 0.3518, Train Acc: 0.8726, Val Loss: 0.3257, Val Acc: 0.8807, Time: 37.00s


  with autocast():


Test Accuracy: 0.8699
Average training time per epoch: 37.13s

Experiment: resnet18, BS=16, Opt=SGD, LR=0.0001, Epochs=5
Using device: cuda

Training resnet18 on FashionMNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/5: Train Loss: 0.6360, Train Acc: 0.7822, Val Loss: 0.4176, Val Acc: 0.8462, Time: 36.98s


  with autocast():
  with autocast():


Epoch 2/5: Train Loss: 0.4132, Train Acc: 0.8530, Val Loss: 0.3649, Val Acc: 0.8633, Time: 35.52s


  with autocast():
  with autocast():


Epoch 3/5: Train Loss: 0.3528, Train Acc: 0.8730, Val Loss: 0.3456, Val Acc: 0.8725, Time: 36.27s


  with autocast():
  with autocast():


Epoch 4/5: Train Loss: 0.3052, Train Acc: 0.8898, Val Loss: 0.3330, Val Acc: 0.8743, Time: 38.24s


  with autocast():
  with autocast():


Epoch 5/5: Train Loss: 0.2742, Train Acc: 0.9014, Val Loss: 0.3229, Val Acc: 0.8813, Time: 37.73s


  with autocast():


Test Accuracy: 0.8823
Average training time per epoch: 36.95s

Experiment: resnet50, BS=16, Opt=SGD, LR=0.0001, Epochs=3
Using device: cuda





Training resnet50 on FashionMNIST
Batch size: 16, Optimizer: SGD, LR: 0.0001


  scaler = GradScaler() if use_amp else None
  with autocast():
  with autocast():


Epoch 1/3: Train Loss: 1.0910, Train Acc: 0.6015, Val Loss: 0.6210, Val Acc: 0.7712, Time: 77.58s


  with autocast():


In [9]:
print(mnist_results)

[{'Dataset': 'MNIST', 'Model': 'resnet18', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.001, 'Epochs': 3, 'Test_Accuracy_%': 98.81, 'Avg_Train_Time_s': 37.2}, {'Dataset': 'MNIST', 'Model': 'resnet18', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.001, 'Epochs': 5, 'Test_Accuracy_%': 98.82, 'Avg_Train_Time_s': 37.06}, {'Dataset': 'MNIST', 'Model': 'resnet50', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.001, 'Epochs': 3, 'Test_Accuracy_%': 98.48, 'Avg_Train_Time_s': 78.14}, {'Dataset': 'MNIST', 'Model': 'resnet50', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.001, 'Epochs': 5, 'Test_Accuracy_%': 98.7, 'Avg_Train_Time_s': 77.51}, {'Dataset': 'MNIST', 'Model': 'resnet18', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.0001, 'Epochs': 3, 'Test_Accuracy_%': 98.56, 'Avg_Train_Time_s': 36.39}, {'Dataset': 'MNIST', 'Model': 'resnet18', 'Batch_Size': 16, 'Optimizer': 'SGD', 'Learning_Rate': 0.0001, 'Epochs': 5, 'Test_Accuracy_%': 98.56, '

In [10]:
import pandas as pd

# Convert list of dictionaries to DataFrame
df = pd.DataFrame(mnist_results)

# Display as a table
print(df)

   Dataset     Model  Batch_Size Optimizer  Learning_Rate  Epochs  \
0    MNIST  resnet18          16       SGD         0.0010       3   
1    MNIST  resnet18          16       SGD         0.0010       5   
2    MNIST  resnet50          16       SGD         0.0010       3   
3    MNIST  resnet50          16       SGD         0.0010       5   
4    MNIST  resnet18          16       SGD         0.0001       3   
5    MNIST  resnet18          16       SGD         0.0001       5   
6    MNIST  resnet50          16       SGD         0.0001       3   
7    MNIST  resnet50          16       SGD         0.0001       5   
8    MNIST  resnet18          16      Adam         0.0010       3   
9    MNIST  resnet18          16      Adam         0.0010       5   
10   MNIST  resnet50          16      Adam         0.0010       3   
11   MNIST  resnet50          16      Adam         0.0010       5   
12   MNIST  resnet18          16      Adam         0.0001       3   
13   MNIST  resnet18          16  

In [11]:
# Save to CSV
df.to_csv('mnist_results.csv', index=False)
print("Saved to 'mnist_results.csv'")

Saved to 'mnist_results.csv'
