In [11]:
# import
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch import nn
import torch.nn.functional as F
from torch import optim

In [12]:
# data loading
training_data = datasets.MNIST(
    root="dataset",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.MNIST( 
    root="dataset",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

In [13]:
# hyperparameters
input_size=28*28
num_classes=10
learning_rate=1e-3 # 5e-4
batch_size=64
num_epochs=3

In [14]:
train_loader=DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True)
test_loader=DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

In [15]:
# neural network model
class NeuralNetwork(nn.Module):
#     def __init__(self, input_size, num_classes):
#         super().__init__()
#         self.fc1=nn.Linear(input_size, 50)
#         self.fc2=nn.Linear(50, num_classes)
        
#     def forward(self, x):
#         x=F.relu(self.fc1(x))
#         x=self.fc2(x)
#         return x
    
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.network=nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1),  # First convolutional layer
            nn.ReLU(),  # Activation function
            nn.MaxPool2d(kernel_size=2, stride=2),  # Max pooling layer

            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),  # Second convolutional layer
            nn.ReLU(),  # Activation function
            nn.MaxPool2d(kernel_size=2, stride=2),  # Max pooling layer
            
            nn.Flatten(),  # Flattening the 2D output to 1D for the fully connected layers
            
            nn.Linear(32 * 7 * 7, 128),  # Fully connected layer (assuming input images are 28x28)
            nn.ReLU(),  # Activation function
            nn.Linear(128, num_classes)  # Output layer
        )
        
    def forward(self, x):
        return self.network(x)

In [16]:
model=NeuralNetwork(input_size=input_size, num_classes=num_classes)

In [17]:
# loss and optimiser
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [18]:
%%time
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        
        # forward
        scores=model(data)
        loss=criterion(scores, targets)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        
        # gradient descent step
        optimizer.step()

CPU times: user 4min 21s, sys: 953 ms, total: 4min 22s
Wall time: 1min 6s


In [19]:
device='cpu'
def accuracy(loader, model):
    num_correct=0
    num_samples=0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            scores=model(x)
            _,predictions=scores.max(1)
            num_correct+=(predictions==y).sum().item()
            num_samples+=predictions.size(0)
            
    model.train()
    return num_correct/num_samples  

In [20]:
print(f'Training Accuracy: {accuracy(train_loader, model)*100:.2f}')
print(f'Testing Accuracy: {accuracy(test_loader, model)*100:.2f}')

Training Accuracy: 98.92
Testing Accuracy: 98.73
