In [1]:
# Neural network comprise of layers/modules that perform operations on the data
# They are also called multi-layer perceptrons
# The torch.nn namespace provides all the building blocks needed to build a neural network

# Every module in pytorch subclasses Module itself. A neural network is a module anc consists of other modules


import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# set device backend
device = "mps"

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
# model
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [17]:
# Do not call the models .forwared method directly. Instead use the __call__ method of the class
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_prob = nn.Softmax(dim=1)(logits)
pred_y = pred_prob.argmax(1)
print("The predicted class is  ", pred_y)


The predicted class is   tensor([5], device='mps:0')


In [27]:
# breaking down the layers step by step
input_image = torch.rand(3, 1, 28, 28)
print(input_image.size())

torch.Size([3, 1, 28, 28])


In [28]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
flat_image.size()

torch.Size([3, 784])

In [29]:
layer1 = nn.Linear(in_features=(28*28), out_features=20)
hidden1 = layer1(flat_image)
hidden1.shape

torch.Size([3, 20])

In [30]:
# now using rectified linear unit as the activation function to apply non linearity
print("Before RelU: ", hidden1)
hidden1 = nn.ReLU()(hidden1)
print("After RelU: ", hidden1)

Before RelU:  tensor([[-0.2121, -0.2561, -0.7692,  0.3023, -0.3522, -0.0836,  0.3412,  0.2685,
          0.3387, -0.3988,  0.5743, -0.1587,  0.5176,  0.5390,  0.0108, -0.4095,
          0.0025, -0.2150,  0.0141,  0.1514],
        [-0.1956, -0.4107, -0.7580,  0.2530, -0.4634, -0.3247,  0.1492,  0.1567,
          0.1700, -0.0902,  0.4843, -0.2355,  0.4493,  0.0800,  0.0553, -0.4699,
          0.3234, -0.4109, -0.2444,  0.1806],
        [-0.2139, -0.3127, -0.5455,  0.3695, -0.3329, -0.3019,  0.2150,  0.0539,
          0.2526, -0.2277,  0.1365, -0.3827,  0.4297,  0.2677, -0.0021, -0.6180,
          0.4580,  0.0589, -0.0829,  0.2305]], grad_fn=<AddmmBackward0>)
After RelU:  tensor([[0.0000, 0.0000, 0.0000, 0.3023, 0.0000, 0.0000, 0.3412, 0.2685, 0.3387,
         0.0000, 0.5743, 0.0000, 0.5176, 0.5390, 0.0108, 0.0000, 0.0025, 0.0000,
         0.0141, 0.1514],
        [0.0000, 0.0000, 0.0000, 0.2530, 0.0000, 0.0000, 0.1492, 0.1567, 0.1700,
         0.0000, 0.4843, 0.0000, 0.4493, 0.0800, 0.05

In [31]:
# nn.Sequential is an ordered container of modules
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10),
)

input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

In [32]:
logits

tensor([[ 0.1242, -0.0405, -0.0229,  0.3410, -0.0947, -0.2843, -0.1926, -0.1750,
         -0.3280,  0.0821],
        [ 0.2915,  0.0212,  0.0240,  0.3364, -0.0176, -0.1982, -0.2266, -0.2328,
         -0.2078,  0.0205],
        [ 0.1684, -0.0463, -0.0244,  0.3866, -0.1162, -0.2265, -0.1310, -0.1283,
         -0.3408,  0.0717]], grad_fn=<AddmmBackward0>)

In [40]:
# softmax converts or logit output from our sequential model into a probability distribution
# the logit values are between 0 and 1 and must all sum to 1
softmax = nn.Softmax(dim=1)(logits)
softmax[2]

tensor([0.1206, 0.0973, 0.0994, 0.1500, 0.0907, 0.0812, 0.0894, 0.0896, 0.0725,
        0.1094], grad_fn=<SelectBackward0>)

In [43]:
print("Model Structure: ", model)
for name, param in model.named_parameters():
    print("Layer: ", name, " | Size: ", param.size(), " | Values: ", param[:2])

Model Structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Layer:  linear_relu_stack.0.weight  | Size:  torch.Size([512, 784])  | Values:  tensor([[ 0.0184,  0.0117, -0.0349,  ..., -0.0113,  0.0041,  0.0209],
        [ 0.0076, -0.0162, -0.0311,  ..., -0.0312,  0.0323, -0.0155]],
       device='mps:0', grad_fn=<SliceBackward0>)
Layer:  linear_relu_stack.0.bias  | Size:  torch.Size([512])  | Values:  tensor([ 0.0147, -0.0301], device='mps:0', grad_fn=<SliceBackward0>)
Layer:  linear_relu_stack.2.weight  | Size:  torch.Size([512, 512])  | Values:  tensor([[ 0.0351, -0.0132, -0.0016,  ...,  0.0056, -0.0032,  0.0082],
        [-0.0202, -0.0065,  0.0396,  ..., -0.0412,  0.0006,  0.0392]],
       device='mps:0', grad_fn