In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd


from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# Convert MNIST image files into tensors of 4D. No of images, height, width and colour channels
transform = transforms.ToTensor()



In [5]:
train_data = datasets.MNIST(root = 'cnn_data',train=True,download=True,transform=transform)


In [6]:
test_data = datasets.MNIST(root='cnn_data',train=False,download=True,transform=transform)

In [7]:
train_loader = DataLoader(train_data,batch_size=10,shuffle=True)
test_loader = DataLoader(test_data,batch_size=10,shuffle=False)

In [8]:
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,3,1)
        self.conv2 = nn.Conv2d(6,16,3,1)
        #Fully Connected layer
        self.fc1 = nn.Linear(400,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    #Forward Propagation
    def forward(self,X):
        #First pass
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X,2,2)
        #self Pass
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X,2,2)

        #Review to flatten
        X = X.view(-1,16*5*5)

        #Fully Connected Layer
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = F.relu(self.fc3(X))

        return F.log_softmax(X,dim=1)

In [9]:
#Model Instance
torch.manual_seed(41)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [10]:
#Loss Function Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0000001)

In [11]:
import time
import torch

# ... (Assuming model, criterion, optimizer, train_loader, test_loader are defined)

start_time = time.time()

# Trackers
epochs = 30
train_losses = []
test_losses = []
train_correct = [] # Total correct per epoch
test_correct = []  # Total correct per epoch

for i in range(epochs):
    trn_corr = 0
    epoch_train_loss = 0 # New accumulator for training loss

    # --- TRAIN ---
    model.train() # Set model to training mode
    for b, (X_train, Y_train) in enumerate(train_loader):
        # 1. Forward pass
        y_pred = model(X_train)
        loss = criterion(y_pred, Y_train)
        
        # 2. Accumulate loss and accuracy
        epoch_train_loss += loss.item() # Add batch loss
        predicted = torch.max(y_pred.data, 1)[1]
        trn_corr += (predicted == Y_train).sum().item() # Accumulate correct predictions

        # 3. Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print batch progress (optional and kept from original logic)
        if (b + 1) % 600 == 0:
            print(f'Epoch: {i+1} Batch: {b+1} Loss: {loss.item()}')
    
    # 4. APPEND EPOCH-LEVEL TRAINING METRICS 
    train_losses.append(epoch_train_loss / len(train_loader)) # Average loss for the epoch
    train_correct.append(trn_corr) # Total correct for the epoch

    # --- TEST ---
    tst_corr = 0
    epoch_test_loss = 0 # New accumulator for test loss
    model.eval() # Set model to evaluation mode
    with torch.no_grad():
        for X_test, Y_test in test_loader: # Removed 'b' as it's not strictly needed here
            y_val = model(X_test)
            
            # 1. Accumulate loss and accuracy
            test_loss = criterion(y_val, Y_test)
            epoch_test_loss += test_loss.item() # Accumulate test batch loss
            
            predicted = torch.max(y_val.data, 1)[1]
            tst_corr += (predicted == Y_test).sum().item()
            
    # 2. APPEND EPOCH-LEVEL TEST METRICS 
    test_losses.append(epoch_test_loss / len(test_loader)) # Average loss for the epoch
    test_correct.append(tst_corr) # Total correct for the epoch
    
    # Optional: Print epoch summary
    print(f'Epoch {i+1} completed. Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}')


current_time = time.time()
total = current_time - start_time
print(f'\nTraining Time: {total/60:.2f} Minutes!') # Use .2f for cleaner output

Epoch: 1 Batch: 600 Loss: 2.2946114540100098
Epoch: 1 Batch: 1200 Loss: 2.2969970703125
Epoch: 1 Batch: 1800 Loss: 2.2978458404541016
Epoch: 1 Batch: 2400 Loss: 2.2971012592315674
Epoch: 1 Batch: 3000 Loss: 2.300535202026367
Epoch: 1 Batch: 3600 Loss: 2.294329881668091
Epoch: 1 Batch: 4200 Loss: 2.3096816539764404
Epoch: 1 Batch: 4800 Loss: 2.300119400024414
Epoch: 1 Batch: 5400 Loss: 2.2951743602752686
Epoch: 1 Batch: 6000 Loss: 2.3093061447143555
Epoch 1 completed. Train Loss: 2.3026, Test Loss: 2.3022
Epoch: 2 Batch: 600 Loss: 2.304964542388916
Epoch: 2 Batch: 1200 Loss: 2.315521717071533
Epoch: 2 Batch: 1800 Loss: 2.2952239513397217
Epoch: 2 Batch: 2400 Loss: 2.3124935626983643
Epoch: 2 Batch: 3000 Loss: 2.308764934539795
Epoch: 2 Batch: 3600 Loss: 2.3129725456237793
Epoch: 2 Batch: 4200 Loss: 2.3105077743530273
Epoch: 2 Batch: 4800 Loss: 2.2981550693511963
Epoch: 2 Batch: 5400 Loss: 2.2996578216552734
Epoch: 2 Batch: 6000 Loss: 2.300868272781372
Epoch 2 completed. Train Loss: 2.30