In [None]:
# ! pip install -U accelerate

In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device="cpu"

print(f"Using {device} device")

Using cpu device


In [12]:
# We define our neural network by subclassing nn.Module, and initialize the neural network 
# layers in __init__. Every nn.Module subclass implements the operations on input data in the forward method.


class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(nn.Linear(28*28, 512), 
                                               nn.ReLU(), 
                                               nn.Linear(512, 512),
                                               nn.ReLU(), 
                                               nn.Linear(512, 10))
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    

In [13]:
#We create an instance of NeuralNetwork, and move it to the device, and print its structure.
model = NeuralNetwork().to(device=device)

In [14]:
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [None]:
# To use the model, we pass it the input data. This executes the model’s 
# forward, along with some background operations. Do not call model.forward() directly!

# Calling the model on the input returns a 2-dimensional tensor with dim=0 
# corresponding to each output of 10 raw predicted values for each class, 
# and dim=1 corresponding to the individual values of each output. We get 
# the prediction probabilities by passing it through an instance of the nn.Softmax module.

In [15]:
X = torch.rand(1, 28, 28, device=device)

In [16]:
logits = model(X)

In [17]:
logits

tensor([[ 0.0147,  0.0983,  0.0975,  0.0458,  0.0370,  0.0234, -0.0896, -0.0331,
          0.0093, -0.0481]], grad_fn=<AddmmBackward0>)

In [18]:
pred_probab = nn.Softmax(1)(logits)

In [22]:
pred_probab

tensor([[0.0998, 0.1085, 0.1084, 0.1029, 0.1020, 0.1006, 0.0899, 0.0951, 0.0992,
         0.0937]], grad_fn=<SoftmaxBackward0>)

In [25]:
pred_probab.argmax(1)

tensor([1])

In [26]:
print(f"Predicted class: {pred_probab.argmax(1)}")

Predicted class: tensor([1])


In [27]:
# Let’s break down the layers in the FashionMNIST model. 
# To illustrate it, we will take a sample minibatch of 3 
# images of size 28x28 and see what happens to it as we pass it through the network.

input_image = torch.rand(3, 28, 28)

In [28]:
#nn.Flatten: We initialize the nn.Flatten layer to convert each 2D 28x28 image 
# into a contiguous array of 784 pixel values ( the minibatch dimension (at dim=0) is maintained).
flatten = nn.Flatten()
flat_image = flatten(input_image)

In [32]:
flat_image

tensor([[0.4006, 0.2832, 0.3711,  ..., 0.1623, 0.2624, 0.7526],
        [0.3472, 0.2779, 0.1446,  ..., 0.3127, 0.1642, 0.9204],
        [0.9449, 0.7630, 0.7195,  ..., 0.0982, 0.4809, 0.5212]])

In [40]:
#nn.Linear: The linear layer is a module that applies a linear transformation on the input using its stored weights and biases.
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)

In [41]:
hidden1

tensor([[-0.2044,  0.1716, -0.2079,  0.2493, -0.0959, -0.3697, -0.2178,  0.4640,
         -0.2125,  0.3842,  0.5408, -0.0531, -0.1709,  0.1705,  0.5511,  0.3166,
         -0.3704,  0.6866,  0.3204,  0.4556],
        [ 0.0283,  0.3484,  0.1538,  0.2839, -0.4455,  0.0571, -0.4884,  0.5781,
          0.2348,  0.0044,  0.8396,  0.0811, -0.1592, -0.1072,  0.1215,  0.0431,
         -0.5144,  0.2720,  0.3181,  0.5103],
        [-0.2061,  0.2797, -0.2497,  0.4385, -0.4037, -0.1647, -0.1745,  0.3845,
         -0.2738,  0.2703,  0.7450,  0.0116, -0.3886, -0.3296,  0.3000,  0.0020,
         -0.2971,  0.3299,  0.3991,  0.2904]], grad_fn=<AddmmBackward0>)

In [42]:
# nn.ReLU:
# Non-linear activations are what create the complex mappings between the model’s inputs and outputs. They are applied after linear transformations to introduce nonlinearity, helping neural networks learn a wide variety of phenomena.

# In this model, we use nn.ReLU between our linear layers, but there’s other activations to introduce non-linearity in your model.

hidden1 = nn.ReLU()(hidden1)


In [43]:
hidden1

tensor([[0.0000, 0.1716, 0.0000, 0.2493, 0.0000, 0.0000, 0.0000, 0.4640, 0.0000,
         0.3842, 0.5408, 0.0000, 0.0000, 0.1705, 0.5511, 0.3166, 0.0000, 0.6866,
         0.3204, 0.4556],
        [0.0283, 0.3484, 0.1538, 0.2839, 0.0000, 0.0571, 0.0000, 0.5781, 0.2348,
         0.0044, 0.8396, 0.0811, 0.0000, 0.0000, 0.1215, 0.0431, 0.0000, 0.2720,
         0.3181, 0.5103],
        [0.0000, 0.2797, 0.0000, 0.4385, 0.0000, 0.0000, 0.0000, 0.3845, 0.0000,
         0.2703, 0.7450, 0.0116, 0.0000, 0.0000, 0.3000, 0.0020, 0.0000, 0.3299,
         0.3991, 0.2904]], grad_fn=<ReluBackward0>)

In [44]:
# nn.Sequential: nn.Sequential is an ordered container of modules. 
# The data is passed through all the modules in the same order as defined. 
# You can use sequential containers to put together a quick network like seq_modules.

seq_modules = nn.Sequential(flatten, layer1, nn.ReLU(), nn.Linear(20, 10))

In [45]:
X = torch.rand(3, 28, 28)

In [46]:
logits = seq_modules(X)

In [47]:
logits

tensor([[ 0.1716, -0.1598, -0.0588, -0.0139,  0.0119,  0.0295, -0.0887,  0.0615,
          0.2024, -0.4259],
        [ 0.1770, -0.1340, -0.1727,  0.0135,  0.0113,  0.0030, -0.0843,  0.1033,
          0.1761, -0.4408],
        [ 0.1998, -0.2518, -0.0946,  0.0586,  0.0856,  0.1007, -0.0135,  0.1168,
          0.2666, -0.4593]], grad_fn=<AddmmBackward0>)

In [50]:
# nn.Softmax: The last linear layer of the neural network returns logits - raw values in 
# [-infty, infty] - which are passed to the nn.Softmax module. The logits are
#  scaled to values [0, 1] representing the model’s predicted probabilities 
# for each class. dim parameter indicates the dimension along which the values must sum to 1.
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [51]:
pred_probab

tensor([[0.1203, 0.0864, 0.0956, 0.1000, 0.1026, 0.1044, 0.0927, 0.1078, 0.1241,
         0.0662],
        [0.1218, 0.0892, 0.0858, 0.1034, 0.1032, 0.1023, 0.0938, 0.1131, 0.1217,
         0.0657],
        [0.1196, 0.0761, 0.0891, 0.1038, 0.1067, 0.1083, 0.0966, 0.1101, 0.1278,
         0.0619]], grad_fn=<SoftmaxBackward0>)

In [53]:
print(f"Model's Structure: {seq_modules}")

Model's Structure: Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=20, bias=True)
  (2): ReLU()
  (3): Linear(in_features=20, out_features=10, bias=True)
)


In [56]:
print(f"Model Structure: {seq_modules.named_parameters}")
for name, param in seq_modules.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model Structure: <bound method Module.named_parameters of Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=20, bias=True)
  (2): ReLU()
  (3): Linear(in_features=20, out_features=10, bias=True)
)>
Layer: 1.weight | Size: torch.Size([20, 784]) | Values : tensor([[-0.0023,  0.0238, -0.0104,  ..., -0.0167, -0.0289,  0.0037],
        [ 0.0250, -0.0104,  0.0095,  ...,  0.0038,  0.0012,  0.0181]],
       grad_fn=<SliceBackward0>) 

Layer: 1.bias | Size: torch.Size([20]) | Values : tensor([0.0179, 0.0346], grad_fn=<SliceBackward0>) 

Layer: 3.weight | Size: torch.Size([10, 20]) | Values : tensor([[-1.1528e-03,  1.2637e-01, -1.4101e-01, -1.5631e-03,  6.6131e-02,
          8.9990e-02,  9.4388e-02, -2.7156e-03, -1.8415e-02,  1.0227e-01,
          1.2607e-01, -1.8378e-01,  5.2246e-02,  1.8180e-01, -5.7845e-02,
         -9.2231e-02, -1.1765e-01,  2.1407e-01, -1.7229e-01,  5.8319e-02],
        [-2.2179e-01,  1.6645e-01,  6.2918e-02, -8.9028e-02,  3.038