# Neural Network MNIST Classifier V1.0

This notebook implements a neural network classifier for MNIST digit recognition using PyTorch.

## Features:
- Custom ImageClassifier with convolutional and fully connected layers
- Custom Dataset class for MNIST data loading
- Data augmentation capabilities
- Training and evaluation pipeline


In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import struct


In [None]:
# ImageClassifier Class - Fixed for MNIST (28x28 grayscale images)
class ImageClassifier(nn.Module):
    def __init__(self):
        super(ImageClassifier, self).__init__()
        
        # Convolutional layers for feature extraction
        self.conv_layers = nn.Sequential(
            # First conv layer: 1 input channel (grayscale), 32 output channels, 3x3 kernel
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 28x28 -> 14x14
            
            # Second conv layer: 32 input channels, 64 output channels, 3x3 kernel
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 14x14 -> 7x7
            
            # Third conv layer: 64 input channels, 128 output channels, 3x3 kernel
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 7x7 -> 3x3 (with padding)
        )
        
        # Fully connected layers for classification
        self.fc_layers = nn.Sequential(
            # Input size: 128 * 3 * 3 = 1152 (for MNIST after conv layers)
            nn.Linear(128 * 3 * 3, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)  # 10 classes for MNIST digits (0-9)
        )
    
    def forward(self, x):
        # Pass through convolutional layers
        x = self.conv_layers(x)
        
        # Flatten the output for fully connected layers
        x = x.view(x.size(0), -1)
        
        # Pass through fully connected layers
        x = self.fc_layers(x)
        
        return x


In [None]:
# CustomDataset Class - Fixed implementation for MNIST
class CustomDataset(Dataset):
    def __init__(self, images_path, labels_path, transform=None):
        """
        Initialize the dataset with MNIST data files
        
        Args:
            images_path (str): Path to MNIST images file
            labels_path (str): Path to MNIST labels file
            transform (callable, optional): Optional transform to be applied on a sample
        """
        self.images_path = images_path
        self.labels_path = labels_path
        self.transform = transform
        
        # Load MNIST data
        self.images = self._load_images()
        self.labels = self._load_labels()
        
        print(f"Loaded {len(self.images)} images and {len(self.labels)} labels")
    
    def _load_images(self):
        """Load MNIST images from binary file"""
        with open(self.images_path, 'rb') as f:
            # Read magic number
            magic = struct.unpack('>I', f.read(4))[0]
            # Read number of images
            num_images = struct.unpack('>I', f.read(4))[0]
            # Read image dimensions
            rows = struct.unpack('>I', f.read(4))[0]
            cols = struct.unpack('>I', f.read(4))[0]
            
            # Read image data
            images = np.frombuffer(f.read(), dtype=np.uint8)
            images = images.reshape(num_images, rows, cols)
            
        return images
    
    def _load_labels(self):
        """Load MNIST labels from binary file"""
        with open(self.labels_path, 'rb') as f:
            # Read magic number
            magic = struct.unpack('>I', f.read(4))[0]
            # Read number of labels
            num_labels = struct.unpack('>I', f.read(4))[0]
            
            # Read label data
            labels = np.frombuffer(f.read(), dtype=np.uint8)
            
        return labels
    
    def __len__(self):
        """Returns the total number of samples"""
        return len(self.images)
    
    def __getitem__(self, idx):
        """Generates one sample of data"""
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # Get image and label
        image = self.images[idx]
        label = self.labels[idx]
        
        # Convert to PIL Image for transforms
        image = torchvision.transforms.ToPILImage()(image)
        
        # Apply transforms if provided
        if self.transform:
            image = self.transform(image)
        else:
            # Default transform: convert to tensor and normalize
            image = transforms.ToTensor()(image)
        
        return image, label


In [None]:
# Data Augmentation Transforms
# Define transforms for training (with augmentation) and validation (without augmentation)
train_transform = transforms.Compose([
    transforms.RandomRotation(10),  # Random rotation up to 10 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST normalization values
])

val_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST normalization values
])

print("Data augmentation transforms defined:")
print("Training transforms:", train_transform)
print("Validation transforms:", val_transform)


In [None]:
# Load MNIST Dataset
# Define paths to MNIST data files
data_dir = Path("data/MNIST/raw")

# Training data paths
train_images_path = data_dir / "train-images-idx3-ubyte"
train_labels_path = data_dir / "train-labels-idx1-ubyte"

# Test data paths
test_images_path = data_dir / "t10k-images-idx3-ubyte"
test_labels_path = data_dir / "t10k-labels-idx1-ubyte"

# Create datasets
print("Loading training dataset...")
train_dataset = CustomDataset(
    images_path=train_images_path,
    labels_path=train_labels_path,
    transform=train_transform
)

print("Loading test dataset...")
test_dataset = CustomDataset(
    images_path=test_images_path,
    labels_path=test_labels_path,
    transform=val_transform
)

# Create data loaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training batches: {len(train_loader)}")
print(f"Test batches: {len(test_loader)}")


In [None]:
# Initialize Model, Loss Function, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create model
model = ImageClassifier().to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Print model summary
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model created successfully!")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Model architecture:")
print(model)


In [None]:
# Training Function
def train_model(model, train_loader, criterion, optimizer, device, epoch):
    """Train the model for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, target)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        
        # Print progress
        if batch_idx % 200 == 0:
            print(f'Epoch: {epoch}, Batch: {batch_idx}/{len(train_loader)}, '
                  f'Loss: {loss.item():.4f}, Accuracy: {100.*correct/total:.2f}%')
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = 100. * correct / total
    
    return epoch_loss, epoch_acc


In [None]:
# Evaluation Function
def evaluate_model(model, test_loader, criterion, device):
    """Evaluate the model on test data"""
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            
            # Forward pass
            outputs = model(data)
            loss = criterion(outputs, target)
            
            # Statistics
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
    
    test_loss = test_loss / len(test_loader)
    test_acc = 100. * correct / total
    
    return test_loss, test_acc


In [None]:
# Training Loop
num_epochs = 5
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

print("Starting training...")
print("=" * 50)

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("-" * 30)
    
    # Train the model
    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device, epoch+1)
    
    # Evaluate the model
    test_loss, test_acc = evaluate_model(model, test_loader, criterion, device)
    
    # Store metrics
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)
    
    # Print epoch summary
    print(f"Epoch {epoch+1} Summary:")
    print(f"  Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
    print(f"  Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%")

print("\nTraining completed!")
print("=" * 50)


In [None]:
# Plot Training Results
plt.figure(figsize=(15, 5))

# Plot Loss
plt.subplot(1, 2, 1)
plt.plot(range(1, num_epochs+1), train_losses, 'b-', label='Training Loss')
plt.plot(range(1, num_epochs+1), test_losses, 'r-', label='Test Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot Accuracy
plt.subplot(1, 2, 2)
plt.plot(range(1, num_epochs+1), train_accuracies, 'b-', label='Training Accuracy')
plt.plot(range(1, num_epochs+1), test_accuracies, 'r-', label='Test Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Print final results
print(f"\nFinal Results:")
print(f"Best Training Accuracy: {max(train_accuracies):.2f}%")
print(f"Best Test Accuracy: {max(test_accuracies):.2f}%")
print(f"Final Training Loss: {train_losses[-1]:.4f}")
print(f"Final Test Loss: {test_losses[-1]:.4f}")


In [None]:
# Test Model on Sample Images
def visualize_predictions(model, test_loader, device, num_samples=8):
    """Visualize model predictions on sample test images"""
    model.eval()
    
    # Get a batch of test data
    data_iter = iter(test_loader)
    images, labels = next(data_iter)
    images, labels = images.to(device), labels.to(device)
    
    # Get predictions
    with torch.no_grad():
        outputs = model(images)
        _, predicted = outputs.max(1)
    
    # Convert to CPU for visualization
    images = images.cpu()
    labels = labels.cpu()
    predicted = predicted.cpu()
    
    # Create subplot
    fig, axes = plt.subplots(2, 4, figsize=(12, 6))
    axes = axes.ravel()
    
    for i in range(min(num_samples, len(images))):
        axes[i].imshow(images[i].squeeze(), cmap='gray')
        axes[i].set_title(f'True: {labels[i].item()}, Predicted: {predicted[i].item()}')
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

# Visualize some predictions
print("Sample predictions:")
visualize_predictions(model, test_loader, device)


In [None]:
# Save the trained model
model_save_path = "mnist_classifier_v1.0.pth"
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'train_losses': train_losses,
    'train_accuracies': train_accuracies,
    'test_losses': test_losses,
    'test_accuracies': test_accuracies,
    'num_epochs': num_epochs,
    'model_architecture': 'ImageClassifier'
}, model_save_path)

print(f"Model saved to: {model_save_path}")
print("Model includes:")
print("- Model weights")
print("- Optimizer state")
print("- Training history")
print("- Model architecture info")
