Build the Neural Network
===

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

## Get Device for Training

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


## Define NN Class

In [7]:
class NeuralNetwork(nn.Module):
    # define NN layers
    def __init__(self): 
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [12]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
predict_proba = nn.Softmax(dim=1)(logits)
y_pred = predict_proba.argmax(1)
print(f'Predicted class: {y_pred}')

Predicted class: tensor([9], device='cuda:0')


## Model Layers

In [13]:
input_img = torch.rand(3, 28, 28)
print(input_img.size())

torch.Size([3, 28, 28])


### nn.Flatten

In [14]:
flatten = nn.Flatten()
flat_image = flatten(input_img)
print(flat_image.size())

torch.Size([3, 784])


### nn.Linear

In [15]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


### nn.ReLU

In [16]:
print(f'Before ReLU: {hidden1}\n\n')
hidden1 = nn.ReLU()(hidden1)
print(f'After ReLU: {hidden1}')

Before ReLU: tensor([[-4.0206e-03, -1.8420e-01,  4.2585e-01, -7.9712e-02, -4.7517e-02,
          9.5549e-02,  5.7821e-02,  6.1336e-01,  1.0414e-02,  1.4349e-01,
         -4.7457e-01,  3.7808e-01,  2.8153e-01,  9.0531e-02, -3.2141e-01,
         -1.8695e-01, -5.7941e-01, -2.2746e-01, -4.8874e-02,  3.1237e-01],
        [-1.8316e-01, -1.2686e-01,  1.0849e-01,  3.0655e-01, -1.0222e-01,
         -4.7171e-02,  1.7246e-01,  1.2512e-01,  9.4444e-02,  3.4892e-02,
         -1.9126e-01,  7.9658e-02,  1.3633e-01, -1.5931e-01, -3.9814e-02,
          7.6603e-02, -5.2350e-01, -8.2148e-02,  6.1319e-02,  1.1769e-01],
        [-4.0036e-01, -1.3306e-02,  5.0517e-01, -1.5739e-01, -1.9223e-02,
         -1.2973e-01,  3.3364e-01, -3.1295e-04,  2.0170e-02,  1.6985e-01,
         -6.8893e-01,  3.2545e-01,  2.5084e-01,  2.5092e-01, -2.0343e-02,
         -2.3845e-01, -4.0627e-01, -9.9635e-02,  2.4819e-01,  2.3495e-01]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.4258, 0.0000, 0.0000,

### nn.Sequential

In [17]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

### nn.Softmax

In [18]:
softmax = nn.Softmax(dim=1)
predict_proba = softmax(logits)

## Model Parameters

In [20]:
print('Model structure: ', model, '\n\n')

for name, param in model.named_parameters():
    print(f'Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n')

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0019,  0.0257,  0.0292,  ...,  0.0084, -0.0312, -0.0141],
        [ 0.0097,  0.0191, -0.0048,  ...,  0.0205,  0.0261,  0.0079]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0341,  0.0235], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[-0.0243,  0.0038, -0.0333,  ..., -0.0270,  0.0172, -0.0210],
        [ 0.0210,  0.0085,  0.0249,  ...,  0.0319,  0.0217, -0.0313]],
       device='cuda:0', grad_fn=<Sli