In [1]:
import os 
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets,transforms

# Setting the device for training

In [2]:
device=(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"We are using the device {device}")

We are using the device cpu


# Defining the class

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten=nn.Flatten()
        self.linear_relu_stack=nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model=NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X=torch.rand(1,28,28,device=device)
logits=model(X)
pred_probab=nn.Softmax(dim=1)(logits)
y_pred=pred_probab.argmax(1)
print(f"the predicted classes by the model: {y_pred}")

the predicted classes by the model: tensor([4])


# Model Layers

In [6]:
input_img=torch.rand(3,28,28)
print(input_img.size())

torch.Size([3, 28, 28])


In [7]:
# initializing the flatten to convert the 2D images to a contiguous array of 784 pixel values
flatten=nn.Flatten()
flat_img=flatten(input_img)
print(flat_img.size())

torch.Size([3, 784])


In [8]:
# initializing the linear to apply linear transformation to the stored weights and biases
layer1=nn.Linear(in_features=28*28,out_features=28)
hidden1=layer1(flat_img)
print(hidden1.size())

torch.Size([3, 28])


In [9]:
# using the ReLU activation function 
print(f"Before ReLU: {hidden1}")
hidden1=nn.ReLU()(hidden1)
print(f"After ReLu: {hidden1}")

Before ReLU: tensor([[ 0.1728,  0.0321,  0.0117,  0.7715, -0.1290,  0.0736,  1.0881,  0.0333,
          0.0855, -0.4946, -0.0624,  0.2756,  0.1146, -0.1562,  0.4084, -0.1825,
         -0.6086,  0.4607,  0.3573, -0.2193,  0.0195,  0.6314,  0.3075,  0.2817,
         -0.3577, -0.0826,  0.4133,  0.1122],
        [ 0.2421,  0.2670,  0.6181,  0.5697, -0.2154,  0.0292,  0.9957,  0.0507,
          0.0700, -0.7504, -0.0544, -0.0091,  0.6214, -0.0546,  0.6406,  0.1060,
         -0.3754,  0.1878,  0.1841, -0.0844, -0.0084, -0.0283, -0.0081,  0.4920,
          0.1522, -0.2473,  0.0554,  0.2559],
        [ 0.2472,  0.6258,  0.4771,  0.3435, -0.3458,  0.0872,  1.0944, -0.1789,
          0.0547, -0.7236, -0.2326,  0.0127,  0.5704,  0.0384,  0.4851,  0.0887,
         -0.2893,  0.2811,  0.0530, -0.1615, -0.4130,  0.2187,  0.0735,  0.2289,
          0.0541, -0.2601, -0.2277,  0.1292]], grad_fn=<AddmmBackward0>)
After ReLu: tensor([[0.1728, 0.0321, 0.0117, 0.7715, 0.0000, 0.0736, 1.0881, 0.0333, 0.0855,


In [10]:
# Sequential creates layers in the oder defined with the specific order given
seq_modules=nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image=torch.rand(3,28,28)
logits=seq_modules(input_image)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x28 and 20x10)

the output is an error because the matrices are of different shapes

In [11]:
# Softmax is scales the logits which were given as output for the image that was passed in the previous cell block and scales it to the range from 0 to 1
softmax=nn.Softmax(dim=1)
pred_probab=softmax(logits)

In [12]:
print(pred_probab)

tensor([[0.0993, 0.1026, 0.0986, 0.0923, 0.1054, 0.1022, 0.0989, 0.1004, 0.1019,
         0.0985]], grad_fn=<SoftmaxBackward0>)


# Model Parameters

the model layers in a neural network are associated with weights and biases that are to be optimised during training.

In [14]:
print(f"Model structure: {model}")
for name,param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]}")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0153,  0.0321, -0.0287,  ..., -0.0119, -0.0295, -0.0073],
        [ 0.0214, -0.0202,  0.0072,  ...,  0.0236,  0.0216,  0.0257]],
       grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0159, -0.0112], grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0274,  0.0007,  0.0294,  ...,  0.0332, -0.0262,  0.0015],
        [ 0.0219,  0.0282, -0.0425,  ..., -0.0243,  0.0078,  0.0234]],
       grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.bias | Size: torch