In [None]:
import torch
import torch.nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils.data import DataLoader,Dataset

: 

In [None]:
class LeNet(nn.Module):
    """
    LeNet Model Definition:
    This model consists of two convolutional layers followed by two fully connected layers.
    It is designed for image classification tasks like MNIST.
    """
    def __init__(self):
        super(LeNet, self).__init__()
        # First convolution layer: 1 input channel (grayscale image), 6 output channels, 5x5 kernel
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        
        # Second convolution layer: 6 input channels, 16 output channels, 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        
        # Fully connected layer 1: input size 16*4*4 (flattened), output size 120
        self.fc1 = nn.Linear(16*4*4, 120)
        
        # Fully connected layer 2: input size 120, output size 84
        self.fc2 = nn.Linear(120, 84)
        
        # Output layer: input size 84, output size 10 (for 10 digit classes)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        """
        Forward pass:
        Applies a sequence of convolutional, activation, pooling, and fully connected layers.
        """
        # Apply first convolution, then ReLU activation, followed by 2x2 Max Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        
        # Apply second convolution, then ReLU activation, followed by 2x2 Max Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        
        # Flatten the tensor into a 1D vector for the fully connected layers
        x = x.view(-1, 16*4*4)
        
        # Apply first fully connected layer and ReLU activation
        x = F.relu(self.fc1(x))
        
        # Apply second fully connected layer and ReLU activation
        x = F.relu(self.fc2(x))
        
        # Output layer (no activation, raw logits)
        x = self.fc3(x)
        
        return x


In [None]:
# Define transformations: Convert to tensor and normalize to mean 0.1307 and std 0.3081 (standard for MNIST)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


In [None]:
# Load training and test datasets
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)


In [None]:
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model and move it to the appropriate device (GPU/CPU)
model = LeNet().to(device)

# Define loss function (CrossEntropyLoss for classification)
criterion = nn.CrossEntropyLoss()

# Define optimizer (Adam optimizer)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    """
    Function to train the model for one epoch.
    - model: The CNN model
    - device: GPU or CPU
    - train_loader: DataLoader for the training dataset
    - optimizer: Optimizer (Adam)
    - epoch: Current epoch number
    """
    model.train()  # Set the model to training mode
    for batch_idx, (data, target) in enumerate(train_loader):
        # Move data and target labels to the appropriate device
        data, target = data.to(device), target.to(device)
        
        # Reset gradients to zero for each batch
        optimizer.zero_grad()
        
        # Forward pass: compute the model output
        output = model(data)
        
        # Compute the loss between the output and target
        loss = criterion(output, target)
        
        # Backward pass: compute gradients
        loss.backward()
        
        # Update the model weights using the optimizer
        optimizer.step()
        
        # Print loss for every 100 batches
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx*len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')



In [None]:
def test(model, device, test_loader):
    """
    Function to test the model's performance on the test dataset.
    - model: The CNN model
    - device: GPU or CPU
    - test_loader: DataLoader for the test dataset
    """
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():  # Disable gradient calculation for testing
        for data, target in test_loader:
            # Move data and target labels to the appropriate device
            data, target = data.to(device), target.to(device)
            
            # Forward pass: compute the model output
            output = model(data)
            
            # Compute the loss (sum up batch loss)
            test_loss += criterion(output, target).item()
            
            # Get the index of the max log-probability (predicted label)
            pred = output.argmax(dim=1, keepdim=True)
            
            # Count correct predictions
            correct += pred.eq(target.view_as(pred)).sum().item()
    
    # Compute average test loss and accuracy
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    
    # Print test results
    print(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)\n')


In [None]:
for epoch in range(1, 11):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)