# EN3160 Assignment 3 on Neural Networks

Instructed by Dr. Ranga Rodrigo

Done by Jayakumar W.S. (210236P)

### Introduction

This assignment is focused on implementing neural networks for image classification. This is done by using:
1. Our own neural network implementation
2. An implementation of LeNet-5
3. An implementation of ResNet-18

### Import necessary libraries

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import gc

### Dataloading

In [3]:
transform = transforms.Compose ([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5) , (0.5, 0.5, 0.5))])
batch_size = 32
trainset = torchvision.datasets.CIFAR10(root= './data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root= './data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device begin used : {device}")

Files already downloaded and verified
Files already downloaded and verified
Device begin used : cuda


### Our own architecture

#### Define Network Parameters

In [3]:
Din = 3*32*32 # Input size (flattened CIFAR=10 image size)
K = 10 # Output size (number of classes in CIFAR=10)
std = 1e-5
# Initialize weights and biases
w = torch.randn(Din, K, device=device, dtype=torch.float, requires_grad=True) * std
b = torch.randn(K, device=device, dtype=torch.float, requires_grad=True)
# Hyperparameters
iterations = 20
lr = 2e-6 # Learning rate
lr_decay = 0.9 # Learning rate decay
reg = 0 # Regularization
loss_history = [ ]

In [4]:
for t in range(iterations):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get inputs and labels
        inputs, labels = data
        Ntr = inputs.shape[0]  # Batch size
        x_train = inputs.view(Ntr, -1).to(device)  # Flatten input to (Ntr, Din)
        y_train_onehot = nn.functional.one_hot(labels, K).float().to(device)  # Convert labels to one-hot

        # Forward pass
        y_pred = x_train.mm(w) + b  # Output layer activation

        # Loss calculation (Mean Squared Error with regularization)
        loss = (1/Ntr) * torch.sum((y_pred - y_train_onehot) ** 2) + reg * torch.sum(w ** 2)
        loss_history.append(loss.item())
        running_loss += loss.item()

        # Backpropagation
        dy_pred = (2.0 / Ntr) * (y_pred - y_train_onehot)
        dw = x_train.t().mm(dy_pred) + reg * w
        db = dy_pred.sum(dim=0)

        # Parameter update
        w = w - lr * dw
        b = b - lr * db

    print(f"Epoch {t + 1} / {iterations}, Loss: {running_loss / len(trainloader)}")

    # Learning rate decay
    lr *= lr_decay

Epoch 1 / 20, Loss: 15.62130003713753
Epoch 2 / 20, Loss: 13.63142822800122
Epoch 3 / 20, Loss: 12.764943606717726
Epoch 4 / 20, Loss: 12.269712213209937
Epoch 5 / 20, Loss: 11.938187017398086
Epoch 6 / 20, Loss: 11.688047737321714
Epoch 7 / 20, Loss: 11.494783885038135
Epoch 8 / 20, Loss: 11.33645681257974
Epoch 9 / 20, Loss: 11.20408751121028
Epoch 10 / 20, Loss: 11.09526105637895
Epoch 11 / 20, Loss: 11.000283862716177
Epoch 12 / 20, Loss: 10.91800398408642
Epoch 13 / 20, Loss: 10.848268118411093
Epoch 14 / 20, Loss: 10.787987606539149
Epoch 15 / 20, Loss: 10.734826364657549
Epoch 16 / 20, Loss: 10.687445311384634
Epoch 17 / 20, Loss: 10.644645016390164
Epoch 18 / 20, Loss: 10.6083836052102
Epoch 19 / 20, Loss: 10.574704309037612
Epoch 20 / 20, Loss: 10.545810999659796


In [5]:
del w, b, x_train, y_train_onehot, y_pred, loss, dy_pred, dw, db
gc.collect()
torch.cuda.empty_cache()

In [6]:
# This implementation is not efficient and is only for educational purposes. For real-world applications, use PyTorch's built-in functions and classes. This fails
# as memory usage increases with the number of iterations.

Din = 3*32*32 # Input size (flattened CIFAR=10 image size)
K = 10 # Output size (number of classes in CIFAR=10)
std = 1e-5
# Initialize weights and biases
w1 = torch.randn(Din, 100, device=device, requires_grad=True)
b1 = torch.zeros(100, device=device, requires_grad=True)
w2 = torch.randn(100, K, device=device, requires_grad=True)
b2 = torch.zeros(K, device=device, requires_grad=True)
# Hyperparameters
iterations = 20
lr = 2e-6 # Learning rate
lr_decay = 0.9 # Learning rate decay
reg = 0 # Regularization
loss_history = [ ]

#### Training loop

In [7]:
for t in range(iterations) :
    running_loss = 0.0
    for i , data in enumerate(trainloader, 0) :
        # Get inputs and labe l s
        inputs , labels = data
        Ntr = inputs.shape[0] # Batch size
        x_train = inputs.view(Ntr, -1).to(device) # Flatten input to (Ntr, Din)
        y_train_onehot = nn.functional.one_hot(labels, K).float().to(device) # Convert labe l s to one=hot # Forward pass
        hidden = x_train.mm(w1) + b1
        y_pred = hidden.mm(w2) + b2
        # Loss calculation (Mean Squared Error with regularization)
        loss = (1/Ntr) * torch.sum((y_pred - y_train_onehot) ** 2) + reg * (torch.sum(w1 ** 2) + torch.sum(w2 ** 2))
        loss_history.append(loss.item())
        running_loss += loss.item()
        # Backpropagation
        dy_pred = (2.0 / Ntr) * (y_pred - y_train_onehot)
        dhidden = dy_pred.mm(w2.t()) 
        dw2 = hidden.t().mm(dy_pred) + reg * w2
        db2 = dy_pred.sum(dim=0)
        dw1 = x_train.t().mm(dhidden) + reg * w1
        db1 = dhidden.sum(dim=0)
        # Parameter update
        w2 = w2 - lr * dw2
        b2 = b2 - lr * db2
        w1 = w1 - lr * dw1
        b1 = b1 - lr * db1
    print(f"Epoch {t+1} / {iterations} , Loss : {running_loss/len(trainloader)}")
    # Learning rat e decay
    lr *= lr_decay

Epoch 1 / 20 , Loss : 58862.28696067259
Epoch 2 / 20 , Loss : 11133.438430952096
Epoch 3 / 20 , Loss : 5069.310151688959
Epoch 4 / 20 , Loss : 2819.410859774872
Epoch 5 / 20 , Loss : 1754.4436296010276
Epoch 6 / 20 , Loss : 1178.6637748186206
Epoch 7 / 20 , Loss : 838.9237689642439
Epoch 8 / 20 , Loss : 624.9427198335595
Epoch 9 / 20 , Loss : 483.2508021066071
Epoch 10 / 20 , Loss : 385.50470730225703
Epoch 11 / 20 , Loss : 315.80564707574825
Epoch 12 / 20 , Loss : 264.682856671412
Epoch 13 / 20 , Loss : 226.30642076371498
Epoch 14 / 20 , Loss : 196.89464548102418
Epoch 15 / 20 , Loss : 173.9280351478933
Epoch 16 / 20 , Loss : 155.69256878814406
Epoch 17 / 20 , Loss : 141.04625540334905
Epoch 18 / 20 , Loss : 129.10847062631365
Epoch 19 / 20 , Loss : 119.32102659766062
Epoch 20 / 20 , Loss : 111.15241264396956


In [None]:
del w1, b1, w2, b2, x_train, y_train_onehot, y_pred, loss, dy_pred, dhidden, dw2, db2, dw1, db1
gc.collect()
torch.cuda.empty_cache()

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self, Din, H, Dout):
        super(NeuralNetwork, self).__init__()
        self.linear1 = nn.Linear(Din, H)
        self.linear2 = nn.Linear(H, Dout)

    def forward(self, x):
        x = torch.relu(self.linear1(x))
        x = self.linear2(x)
        return x

In [7]:
model = NeuralNetwork(Din, 100, K).to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=reg)

In [None]:
for t in range(iterations):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get inputs and labels
        inputs, labels = data
        Ntr = inputs.shape[0]  # Batch size
        x_train = inputs.view(Ntr, -1).to(device)  # Flatten input to (Ntr, Din)
        y_train = labels.to(device)  # Convert labels to one-hot

        # Forward pass
        y_pred = model(x_train)

        # Loss calculation
        loss_val = loss(y_pred, y_train)
        loss_history.append(loss_val.item())
        running_loss += loss_val.item()

        # Backpropagation
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

    print(f"Epoch {t + 1} / {iterations}, Loss: {running_loss / len(trainloader)}")

Epoch 1 / 20, Loss: 2.1878856887286546
Epoch 2 / 20, Loss: 2.049632305528442
Epoch 3 / 20, Loss: 1.97469479146861
Epoch 4 / 20, Loss: 1.9242408470091572
Epoch 5 / 20, Loss: 1.8867068309777835
Epoch 6 / 20, Loss: 1.857231892459452
Epoch 7 / 20, Loss: 1.833428738670935
Epoch 8 / 20, Loss: 1.813351829007735
Epoch 9 / 20, Loss: 1.7960078072563166
Epoch 10 / 20, Loss: 1.7810122685331757
Epoch 11 / 20, Loss: 1.7676157695852024
Epoch 12 / 20, Loss: 1.7555241500118643
Epoch 13 / 20, Loss: 1.7445906064331875
Epoch 14 / 20, Loss: 1.7345402882179997
Epoch 15 / 20, Loss: 1.725008711247435
Epoch 16 / 20, Loss: 1.7163191498355537
Epoch 17 / 20, Loss: 1.7079995072048135
Epoch 18 / 20, Loss: 1.7004681771486445
Epoch 19 / 20, Loss: 1.6931195985942946
Epoch 20 / 20, Loss: 1.6863281204390816


In [None]:
accuracy = 0
model.eval()
with  torch.inference_mode():
    for i, data in enumerate(testloader, 0):
        inputs, labels = data
        x_test, y_test = inputs.to(device), labels.to(device)
        y_pred = model(x_test)
        _, predicted = torch.max(y_pred, 1)
        accuracy += (predicted == y_test).sum().item()

print(f"Accuracy: {accuracy / len(testset)}")

### LeNet-5

In [None]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        y = self.classifier(self.conv2(self.conv1(x)).view(-1, 16*5*5))
        return y

In [11]:
model = LeNet().to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
for t in range(iterations):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Get inputs and labels
        inputs, labels = data
        x_train, y_train = inputs.to(device), labels.to(device)

        # Forward pass
        y_pred = model(x_train)

        # Loss calculation
        loss_val = loss(y_pred, y_train)
        loss_history.append(loss_val.item())
        running_loss += loss_val.item()

        # Backpropagation
        optimizer.zero_grad()
        loss_val.backward()
        optimizer.step()

    print(f"Epoch {t + 1} / {iterations}, Loss: {running_loss / len(trainloader)}")

Epoch 1 / 20, Loss: 1.6081523288158142
Epoch 2 / 20, Loss: 1.3175881011167248
Epoch 3 / 20, Loss: 1.1851489864628206
Epoch 4 / 20, Loss: 1.0968947963156306
Epoch 5 / 20, Loss: 1.0310549114235532
Epoch 6 / 20, Loss: 0.9732997963539851
Epoch 7 / 20, Loss: 0.9319621490616106
Epoch 8 / 20, Loss: 0.8904304512593507
Epoch 9 / 20, Loss: 0.853049688169915
Epoch 10 / 20, Loss: 0.8250834326368833
Epoch 11 / 20, Loss: 0.7981342941198453
Epoch 12 / 20, Loss: 0.7688879380413758
Epoch 13 / 20, Loss: 0.7420874635473856
Epoch 14 / 20, Loss: 0.7198617670372824
Epoch 15 / 20, Loss: 0.6975821146237415
Epoch 16 / 20, Loss: 0.6714369931799894
Epoch 17 / 20, Loss: 0.6597335995940619
Epoch 18 / 20, Loss: 0.6329375674670428
Epoch 19 / 20, Loss: 0.6182287512627155
Epoch 20 / 20, Loss: 0.5970457774644774


In [13]:
accuracy = 0
model.eval()
with  torch.inference_mode():
    for i, data in enumerate(testloader, 0):
        inputs, labels = data
        x_test, y_test = inputs.to(device), labels.to(device)
        y_pred = model(x_test)
        _, predicted = torch.max(y_pred, 1)
        accuracy += (predicted == y_test).sum().item()

print(f"Accuracy: {accuracy / len(testset)}")

Accuracy: 0.6313
