# NN Implementation

In [None]:
import torch
n_input , n_hidden , n_output = 5 , 3, 1

In [None]:
## Initialize tensor for inputs and outputs
x = torch.randn ((1 , n_input ))
y = torch.randn ((1 , n_output ))

## Initialize tensor variables for weights
w1 = torch.randn ( n_input , n_hidden ) # weight for hidden layer
w2 = torch.randn ( n_hidden , n_output ) # weight for output layer

## initialize tensor variables for bias terms
b1 = torch.randn ((1 , n_hidden )) # bias for hidden layer
b2 = torch.randn ((1 , n_output )) # bias for output layer

In [None]:
## sigmoid activation function using pytorch
def sigmoid_activation (z):
  return 1 / (1 + torch .exp (-z))

## activation of hidden layer
z1 = torch.mm(x, w1) + b1
a1 = sigmoid_activation (z1)
## activation ( output ) of final layer
z2 = torch.mm(a1 , w2) + b2
output = sigmoid_activation (z2)

In [None]:
loss = y - output
print(loss)

tensor([[-0.5710]])


In [None]:
## function to calculate the derivative of activation
def sigmoid_delta (x):
  return x * (1 - x)

## compute derivative of error terms
delta_output = sigmoid_delta ( output )
delta_hidden = sigmoid_delta (a1)
## backpass the changes to previous layers
d_outp = loss * delta_output
loss_h = torch .mm(d_outp , w2.t())
d_hidn = loss_h * delta_hidden

In [None]:
learning_rate = 0.1

w2 += torch .mm(a1.t() , d_outp ) * learning_rate
w1 += torch .mm(x.t() , d_hidn ) * learning_rate
b2 += d_outp .sum () * learning_rate
b1 += d_hidn .sum () * learning_rate

# TASK - MNIST

In [None]:
74%3

2

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
import numpy as np
import matplotlib.pyplot as plt

In [None]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

In [None]:
train_dataset = torchvision.datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root="./data", train=False, transform=transform, download=True)

In [None]:
len(train_dataset)

60000

In [None]:
len(test_dataset)

10000

In [None]:
selected_classes = {0, 1, 2, 3, 4}

train_indices = []

for i, (img, label) in enumerate(train_dataset):
    if label in selected_classes:
        train_indices.append(i)


filtered_train_dataset = Subset(train_dataset, train_indices)

In [None]:
len(filtered_train_dataset)

30596

In [None]:
test_indices = []

for i, (img, label) in enumerate(test_dataset):
    if label in selected_classes:
        test_indices.append(i)

filtered_test_dataset = Subset(test_dataset, test_indices)

In [None]:
len(filtered_test_dataset)

5139

In [None]:
train_size = int(0.8 * len(filtered_train_dataset))
val_size = len(filtered_train_dataset) - train_size
filtered_train_dataset, filtered_val_dataset = torch.utils.data.random_split(filtered_train_dataset, [train_size, val_size])

In [None]:
train_loader = DataLoader(filtered_train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(filtered_val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(filtered_test_dataset, batch_size=64, shuffle=False)

In [None]:
74%1000

74

In [None]:
class TinyModel(torch.nn.Module):
    def __init__(self):
        super(TinyModel, self).__init__()
        self.flatten = torch.nn.Flatten()
        self.linear1 = torch.nn.Linear(28 * 28, 74)   # Input Layer
        self.linear2 = torch.nn.Linear(74, 74)        # Hidden Layer
        self.linear3 = torch.nn.Linear(74, 74)        # Hidden Layer
        self.linear4 = torch.nn.Linear(74, 74)        # Hidden Layer
        self.linear5 = torch.nn.Linear(74, 10)        # Output Layer
        self.activation = torch.nn.Sigmoid()
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = self.flatten(x)
        x = self.activation(self.linear1(x))
        x = self.activation(self.linear2(x))
        x = self.activation(self.linear3(x))
        x = self.activation(self.linear4(x))
        x = self.linear5(x)
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TinyModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
num_epochs = 10

In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_acc:.2f}%")

Epoch 1, Loss: 1.1436, Train Accuracy: 51.87%
Epoch 2, Loss: 0.2635, Train Accuracy: 93.01%
Epoch 3, Loss: 0.1233, Train Accuracy: 97.00%
Epoch 4, Loss: 0.0955, Train Accuracy: 97.65%
Epoch 5, Loss: 0.0792, Train Accuracy: 98.02%
Epoch 6, Loss: 0.0662, Train Accuracy: 98.26%
Epoch 7, Loss: 0.0559, Train Accuracy: 98.57%
Epoch 8, Loss: 0.0503, Train Accuracy: 98.65%
Epoch 9, Loss: 0.0424, Train Accuracy: 98.89%
Epoch 10, Loss: 0.0388, Train Accuracy: 98.93%


In [None]:
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy: {test_accuracy:.2f}%")

Test Accuracy: 99.05%
