Step-0 Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import os
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


Step-1 Download Dataset and wrap-up to data loader.

In [18]:
Batch_Size = 100
transform = transforms.Compose([
    transforms.ToTensor(), # tensor
    transforms.Normalize(mean=(0.3081, ), std=(0.3081, )) # normalize
])

train = datasets.MNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test = datasets.MNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

train_loader = DataLoader(train, batch_size=Batch_Size, shuffle=True)
test_loader = DataLoader(test, batch_size=Batch_Size, shuffle=True)


Step-2 : Sequential Approach

In [20]:
model_seq = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 500),
    nn.ReLU(),
    nn.Linear(500, 250),
    nn.ReLU(),
    nn.Linear(250, 10)
).to(device)

print(model_seq)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=500, bias=True)
  (2): ReLU()
  (3): Linear(in_features=500, out_features=250, bias=True)
  (4): ReLU()
  (5): Linear(in_features=250, out_features=10, bias=True)
)


In [21]:
# loss and optimizers
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model_seq.parameters())

In [22]:
# Training loop
Epochs=10
def train(model, data_loader):
    model.train()
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward Pass : initialize the optimizer
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Back-Propagation : update parameters
        loss.backward()
        optimizer.step()
    return loss

# lets class the training loop for the model
for epoch in range(Epochs):
    train(model_seq, train_loader)
    print(f"Epoch : {epoch + 1}")

Epoch : 1
Epoch : 2
Epoch : 3
Epoch : 4
Epoch : 5
Epoch : 6
Epoch : 7
Epoch : 8
Epoch : 9
Epoch : 10


In [23]:
# Evaluating the trained model onto the test dataset
def evaluation(model, data_loader):
    model.eval()
    correct = 0
    total=0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total
accuracy = evaluation(model_seq, test_loader)
print(f"Models training accuracy : {accuracy}")

Models training accuracy : 0.9466


In [24]:
# Sub-Class API
class ANN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 500)
        self.fc2 = nn.Linear(500, 250)
        self.fc3 = nn.Linear(250, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
model = ANN().to(device)
print(model)

ANN(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=250, bias=True)
  (fc3): Linear(in_features=250, out_features=10, bias=True)
)


In [25]:
import torch.nn.functional as F

class ANNFunctional(nn.Module):
    def __init__(self):
        super().__init__()
        self.w1 = nn.Linear(28*28, 500)
        self.w2 = nn.Linear(500, 250)
        self.w3 = nn.Linear(250, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.w1(x))
        x = F.relu(self.w2(x))
        return F.softmax(self.w3(x), dim=1)
model = ANNFunctional().to(device)
print(model)

ANNFunctional(
  (w1): Linear(in_features=784, out_features=500, bias=True)
  (w2): Linear(in_features=500, out_features=250, bias=True)
  (w3): Linear(in_features=250, out_features=10, bias=True)
)


In [29]:
class DynamicANN(nn.Module):
    def __init__(self, layer_sizes):
        super().__init__()
        self.layers = nn.ModuleList(
            [nn.Linear(layer_sizes[i], layer_sizes[i+1])
             for i in range(len(layer_sizes)-1)]
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        for layer in self.layers[:-1]:
            x = torch.relu(layer(x))
        return self.layers[-1](x)
model = DynamicANN(layer_sizes=[28*28, 500, 250, 10]).to(device)
print(model)

DynamicANN(
  (layers): ModuleList(
    (0): Linear(in_features=784, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=250, bias=True)
    (2): Linear(in_features=250, out_features=10, bias=True)
  )
)


In [30]:
class ManualANN(nn.Module):
    def __init__(self):
        super().__init__()
        self.W1 = nn.Parameter(torch.randn(784, 500))
        self.b1 = nn.Parameter(torch.zeros(500))
        self.W2 = nn.Parameter(torch.randn(500, 250))
        self.b2 = nn.Parameter(torch.zeros(250))
        self.W3 = nn.Parameter(torch.randn(250, 10))
        self.b3 = nn.Parameter(torch.zeros(10))

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(x @ self.W1 + self.b1)
        x = torch.relu(x @ self.W2 + self.b2)
        return x @ self.W3 + self.b3

model = ManualANN().to(device)
print(model)

ManualANN()
