In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

  warn(


In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


### Model Layers


In [5]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


### nn.Flatten
We initialize the nn.Flatten layer to convert each 2D 28x28 image into a contiguous array of 784 pixel values ( the minibatch dimension (at dim=0) is maintained).

In [8]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


### nn.Linear
The linear layer is a module that applies a linear transformation on the input using its stored weights and biases.

In [9]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


### nn.ReLU
we use nn.ReLU between our linear layers, but there’s other activations to introduce non-linearity in your model.

In [11]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.2512, -0.0152, -0.2384,  0.0947, -0.1140,  0.3538, -0.2347,  0.2613,
          0.4636,  0.0229,  0.4673, -0.0371,  0.0564, -0.0284, -0.0146,  0.2626,
          0.1695, -0.1971,  0.3223,  0.1126],
        [-0.1948, -0.0068, -0.3568, -0.2343, -0.5468,  0.3599, -0.2094,  0.4979,
          0.3614, -0.3055,  0.8856, -0.2221,  0.3540,  0.0218,  0.2014, -0.0376,
          0.2093, -0.5265,  0.4734,  0.3846],
        [-0.1691,  0.0167, -0.3191, -0.3748, -0.2093,  0.1512, -0.1721,  0.2838,
          0.0703, -0.3429,  0.9064, -0.3148,  0.3736,  0.0491, -0.2410,  0.2093,
         -0.0198, -0.4621,  0.2385,  0.1343]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0947, 0.0000, 0.3538, 0.0000, 0.2613, 0.4636,
         0.0229, 0.4673, 0.0000, 0.0564, 0.0000, 0.0000, 0.2626, 0.1695, 0.0000,
         0.3223, 0.1126],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.3599, 0.0000, 0.4979, 0.3614,
         0.0000, 0.8856, 0.0000, 0.3540, 0.0218, 0.20

### nn.Sequential
nn.Sequential is an ordered container of modules. The data is passed through all the modules in the same order as defined. You can use sequential containers to put together a quick network like seq_modules.

In [12]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

### nn.Softmax
turns numbers into probabilities, ensuring they sum to 1 along a specified dimension.

In [13]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

### Model Parameters

In [14]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0301,  0.0009, -0.0090,  ..., -0.0051, -0.0186, -0.0291],
        [-0.0007,  0.0075, -0.0050,  ..., -0.0084,  0.0306,  0.0105]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 2.7689e-02, -7.1265e-06], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0037,  0.0346,  0.0160,  ..., -0.0205,  0.0255, -0.0338],
        [ 0.0316,  0.0344, -0.0191,  ...,  0.0017, -0.0065,  0.0041]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2