In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [18]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2], device='cuda:0')


In [19]:
# Broken down into bits

# Generates a 3 x 28 x 28 tensor, (each slice along first dim (dim 0) is new image)
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [20]:
# nn.Flatten() takes the 28*28 image down to an array of (784) pixel values
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [38]:
# nn.Linear is a linear layer wit h stored weights in biases
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [39]:
# nn.ReLU adds the non-linear activation functions to create more complex mappings (like the sigmoid and others)
print(f'Before ReLU: {hidden1}\n\n')
hidden1 = nn.ReLU()(hidden1)
print(f'After ReLU: {hidden1}')

# ReLU stands for rectified linear unit which is defined as: max(0, x)


Before ReLU: tensor([[-1.1084, -0.1014, -0.1117,  0.2008, -0.4148, -0.0251,  0.1911,  0.2411,
         -0.0382,  0.5008, -0.2854,  0.3267,  0.2194,  0.3842,  0.0022,  0.2035,
          0.6013, -0.0603, -0.3475, -0.0478],
        [-1.1623,  0.0909,  0.2337,  0.0931, -0.3611,  0.1294,  0.1397,  0.0296,
          0.0864,  0.2243, -0.2274,  0.1507,  0.0856,  0.5075, -0.0119,  0.0729,
          0.8194, -0.4182, -0.2110, -0.0120],
        [-0.9132,  0.0228,  0.1302, -0.1744, -0.3482,  0.0229,  0.3312, -0.0728,
          0.1049,  0.3491,  0.0012,  0.0259, -0.1326,  0.2793, -0.1923,  0.1090,
          0.2682, -0.3579, -0.1183, -0.2084]], grad_fn=<AddmmBackward>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.2008, 0.0000, 0.0000, 0.1911, 0.2411, 0.0000,
         0.5008, 0.0000, 0.3267, 0.2194, 0.3842, 0.0022, 0.2035, 0.6013, 0.0000,
         0.0000, 0.0000],
        [0.0000, 0.0909, 0.2337, 0.0931, 0.0000, 0.1294, 0.1397, 0.0296, 0.0864,
         0.2243, 0.0000, 0.1507, 0.0856, 0.5075, 0.000

In [42]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)


In [43]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [44]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0131, -0.0348, -0.0063,  ...,  0.0187, -0.0143, -0.0341],
        [ 0.0042,  0.0296, -0.0127,  ..., -0.0315, -0.0051, -0.0199]],
       device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0300,  0.0010], device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0403,  0.0428, -0.0298,  ...,  0.0262, -0.0241,  0.0343],
        [ 0.0420,  0.0004, -0.0233,  ..., -0.0288, -0.0402,  0.0251]],
       device='cuda

In [None]:
# Now we have trained a neural network to identify certain articles from FASHIONMNIST