In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [4]:
# MLP model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
X = torch.rand(1, 28, 28, device=device)

In [7]:
logits = model(X)

In [8]:
logits

tensor([[-0.0200,  0.1579,  0.0189, -0.0603, -0.0167, -0.0257, -0.0297, -0.0579,
          0.0316, -0.0990]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [9]:
pred_probab = nn.Softmax(dim=1)(logits)

In [10]:
pred_probab

tensor([[0.0988, 0.1180, 0.1027, 0.0949, 0.0991, 0.0982, 0.0978, 0.0951, 0.1040,
         0.0913]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [11]:
y_pred = pred_probab.argmax(1)

In [12]:
y_pred

tensor([1], device='cuda:0')

In [13]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [16]:
flatten = nn.Flatten()
flat_image = flatten(input_image)

In [18]:
flat_image.size()

torch.Size([3, 784])

In [25]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)

In [26]:
hidden1.size()

torch.Size([3, 20])

In [27]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-3.1420e-01, -2.1934e-01,  2.3417e-02, -1.0664e-01,  1.2381e-01,
         -2.6046e-01, -7.9166e-02,  5.5974e-01, -8.8430e-01, -5.6441e-01,
          1.8170e-04,  1.4090e-01, -1.3034e-01,  3.5827e-01, -1.8032e-01,
          8.5087e-02,  4.5412e-01,  9.5934e-01, -1.2918e-01,  7.2800e-02],
        [-2.8246e-02, -2.2572e-01,  1.6056e-01, -2.8384e-01,  1.1361e-01,
          3.9158e-01, -6.2789e-02,  5.6711e-01, -5.1137e-01, -5.1445e-01,
         -1.3543e-01,  9.2521e-02, -1.8562e-01, -1.7944e-03, -1.1422e-01,
          1.1782e-01,  3.7196e-01,  9.7672e-01, -6.5283e-02, -6.0924e-03],
        [-2.5965e-01,  7.2855e-02,  2.2354e-01, -3.7467e-02, -1.6223e-01,
         -1.0650e-01, -2.9107e-01,  2.9274e-01, -4.7454e-01, -3.4509e-01,
          1.1435e-02,  3.5857e-01, -6.4989e-02, -2.1011e-01, -3.0828e-01,
         -4.6498e-02,  4.8169e-01,  1.0026e+00, -3.6972e-02, -1.7675e-01]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000e+00, 0.0000e+00, 2.3417e-02, 0.0