In [7]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [22]:
import torch.nn as nn
import torch.nn.functional as F

DROP_OUT = 0.05

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(DROP_OUT)
        )  # output_size = 26, receptive_field = 3, output_channels = 16

        # Convolution Block 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(16, 32, 3, padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(DROP_OUT)
        )  # output_size = 24, receptive_field = 5, output_channels = 32

        # Transition Block 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(32, 12, 1, padding=0, bias=False),
        )  # output_size = 24, receptive_field = 5, output_channels = 12
        self.pool1 = nn.MaxPool2d(2, 2)  # output_size = 12, receptive_field = 6, output_channels = 14

        # Convolution Block 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(12, 16, 3, padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(DROP_OUT)
        )  # output_size = 10, receptive_field = 10, output_channels = 16
        self.convblock5 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(DROP_OUT)
        )  # output_size = 8, receptive_field = 14, output_channels = 16
        self.convblock6 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(DROP_OUT)
        )  # output_size = 6, receptive_field = 18, output_channels = 16
        self.convblock7 = nn.Sequential(
            nn.Conv2d(16, 16, 3, padding=1, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(DROP_OUT)
        )  # output_size = 6, receptive_field = 22, output_channels = 16

        # Fully Connected Layer
        self.fc = nn.Linear(16 * 6 * 6, 10)  # 16 channels, 6x6 feature map

        self.dropout = nn.Dropout(DROP_OUT)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.convblock7(x)
        x = x.view(-1, 16 * 6 * 6)  # Flatten the tensor
        x = self.fc(x)
        return F.log_softmax(x, dim=-1)

In [23]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
              ReLU-2           [-1, 16, 26, 26]               0
       BatchNorm2d-3           [-1, 16, 26, 26]              32
           Dropout-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,608
              ReLU-6           [-1, 32, 24, 24]               0
       BatchNorm2d-7           [-1, 32, 24, 24]              64
           Dropout-8           [-1, 32, 24, 24]               0
            Conv2d-9           [-1, 12, 24, 24]             384
        MaxPool2d-10           [-1, 12, 12, 12]               0
           Conv2d-11           [-1, 16, 10, 10]           1,728
             ReLU-12           [-1, 16, 10, 10]               0
      BatchNorm2d-13           [-1, 16, 10, 10]              32
          Dropout-14           [-1, 16,

In [24]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

def get_data_loaders(batch_size=64):

    train_transform=transforms.Compose([
                        transforms.RandomRotation((-5.0, 5.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    # Load full datasets
    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=train_transform)
    test_dataset = datasets.MNIST('./data', train=False, transform=transform)

    # Create indices for 25% of training data
    # total_train = len(train_dataset)
    # indices = np.random.permutation(total_train)
    # train_size = int(0.25 * total_train)  # 25% of the data
    # train_indices = indices[:train_size]

    # Create subset of training data
    # train_dataset = Subset(train_dataset, train_indices)

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # print(f"Training with {train_size:,} samples (25% of original {total_train:,} samples)")
    print(f"Training with {len(train_dataset)} samples")


    return train_loader, test_loader

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
import logging
from pathlib import Path
import torch.nn.functional as F
import numpy as np
import random

def set_seed(seed=42):
    """Set seeds for reproducibility."""
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

def setup_logger():
    log_dir = Path('logs')
    log_dir.mkdir(exist_ok=True)

    logging.basicConfig(
        filename=f'logs/training_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log',
        level=logging.INFO,
        format='%(asctime)s - %(message)s'
    )

def get_device():
    if torch.backends.mps.is_available():
        device = torch.device("mps")
        device_name = "Apple Silicon (M1/M2)"
    elif torch.cuda.is_available():
        device = torch.device("cuda")
        device_name = f"CUDA ({torch.cuda.get_device_name(0)})"
    else:
        device = torch.device("cpu")
        device_name = "CPU"
    return device, device_name

def train(epochs=20, batch_size=64, learning_rate=0.01, target_accuracy=99.4):
    set_seed(42)  # Set seed for reproducibility
    setup_logger()

    # Device setup
    device, device_name = get_device()
    gpu_info = f"Using device: {device} ({device_name})"

    if device.type == 'cuda':
        gpu_info += f"\nMemory Usage:"
        gpu_info += f"\n  Allocated: {round(torch.cuda.memory_allocated(0)/1024**2,1)} MB"
        gpu_info += f"\n  Cached:    {round(torch.cuda.memory_reserved(0)/1024**2,1)} MB"

    print(gpu_info)
    # logging.info(gpu_info)

    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    train_loader, test_loader = get_data_loaders(batch_size)

    best_accuracy = 0.0
    early_stop = False

    for epoch in range(epochs):
        if early_stop:
            break

        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()

            # Print batch progress
            # if batch_idx % 100 == 0:
            #     print(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
            #           f'({100. * batch_idx / len(train_loader):.0f}%)]  Loss: {loss.item():.6f}')

        train_accuracy = 100. * correct / total
        train_loss = train_loss / len(train_loader)

        # Validation
        model.eval()
        val_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                val_loss += F.nll_loss(output, target, reduction='sum').item()
                _, predicted = output.max(1)
                total += target.size(0)
                correct += predicted.eq(target).sum().item()

        val_accuracy = 100. * correct / total
        val_loss = val_loss / len(test_loader)

        # Update best accuracy
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy

        # Check for early stopping
        if val_accuracy >= target_accuracy:
            early_stop = True
            print(f"\nReached target accuracy of {target_accuracy}% at epoch {epoch}")
            # logging.info(f"Reached target accuracy of {target_accuracy}% at epoch {epoch}")

        # Log epoch results
        log_message = (f'Epoch: {epoch} | '
                      f'Train Loss: {train_loss:.3f} | '
                      f'Train Acc: {train_accuracy:.2f}% | '
                      f'Val Loss: {val_loss:.3f} | '
                      f'Val Acc: {val_accuracy:.2f}% | '
                      f'Best Val Acc: {best_accuracy:.2f}%')
        # logging.info(log_message)
        print(log_message)

        # Log GPU memory only for CUDA devices
        if device.type == 'cuda':
            memory_info = (f"GPU Memory: "
                         f"Allocated: {round(torch.cuda.memory_allocated(0)/1024**2,1)} MB, "
                         f"Cached: {round(torch.cuda.memory_reserved(0)/1024**2,1)} MB")
            # logging.info(memory_info)
            print(memory_info)

    return best_accuracy

In [35]:
final_accuracy = train()
print(f"\nTraining completed. Best validation accuracy: {final_accuracy:.2f}%")

Using device: cuda (CUDA (Tesla T4))
Memory Usage:
  Allocated: 17.7 MB
  Cached:    120.0 MB
Training with 60000 samples
Epoch: 0 | Train Loss: 0.129 | Train Acc: 95.84% | Val Loss: 3.849 | Val Acc: 98.08% | Best Val Acc: 98.08%
GPU Memory: Allocated: 18.0 MB, Cached: 120.0 MB
Epoch: 1 | Train Loss: 0.051 | Train Acc: 98.37% | Val Loss: 2.142 | Val Acc: 98.90% | Best Val Acc: 98.90%
GPU Memory: Allocated: 18.0 MB, Cached: 120.0 MB
Epoch: 2 | Train Loss: 0.040 | Train Acc: 98.74% | Val Loss: 1.735 | Val Acc: 99.16% | Best Val Acc: 99.16%
GPU Memory: Allocated: 18.0 MB, Cached: 120.0 MB
Epoch: 3 | Train Loss: 0.035 | Train Acc: 98.89% | Val Loss: 1.746 | Val Acc: 99.08% | Best Val Acc: 99.16%
GPU Memory: Allocated: 18.0 MB, Cached: 120.0 MB
Epoch: 4 | Train Loss: 0.030 | Train Acc: 99.04% | Val Loss: 1.783 | Val Acc: 99.10% | Best Val Acc: 99.16%
GPU Memory: Allocated: 18.0 MB, Cached: 120.0 MB
Epoch: 5 | Train Loss: 0.027 | Train Acc: 99.15% | Val Loss: 1.539 | Val Acc: 99.28% | Best V