# Build the neural network

In [2]:
import os
import torch 
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

## 1.Get device for training

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
print('Using {} device'.format(device))

Using cuda device


In [34]:
class NeuralNetwork(nn.Module):
    def  __init__(self):
        super(NeuralNetwork, self).__init__()
        # convert image into 784 pixel value
        self.flatten = nn.Flatten()
        # orderded container of modules
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),# introduce non-linearity
            nn.Linear(512, 512),
            nn.ReLU(), # introduce non-linearity
            nn.Linear(512, 10)
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [35]:
model = NeuralNetwork().to(device)

In [45]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
# Probs for each class
pred_prob = nn.Softmax(dim = 1)(logits)
y_pred = pred_prob.argmax(1)

In [46]:
y_pred

tensor([6], device='cuda:0')

In [47]:
logits

tensor([[ 0.0423,  0.0350, -0.0435,  0.0718, -0.0588, -0.0545,  0.0742, -0.0573,
         -0.0729,  0.0156]], device='cuda:0', grad_fn=<AddmmBackward>)

In [48]:
pred_prob

tensor([[0.1047, 0.1039, 0.0961, 0.1078, 0.0946, 0.0950, 0.1081, 0.0947, 0.0933,
         0.1019]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [49]:
print("Model structure: ", model)

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [52]:
for name, param in model.named_parameters():
    print('layer : {} | size : {} | values {}'.format(name, param.size(), param[:2]))

layer : linear_relu_stack.0.weight | size : torch.Size([512, 784]) | values tensor([[ 0.0242,  0.0342,  0.0293,  ..., -0.0103, -0.0079,  0.0133],
        [-0.0317, -0.0150, -0.0143,  ...,  0.0347,  0.0227,  0.0053]],
       device='cuda:0', grad_fn=<SliceBackward>)
layer : linear_relu_stack.0.bias | size : torch.Size([512]) | values tensor([-0.0036,  0.0103], device='cuda:0', grad_fn=<SliceBackward>)
layer : linear_relu_stack.2.weight | size : torch.Size([512, 512]) | values tensor([[-0.0235,  0.0161, -0.0279,  ...,  0.0176,  0.0213, -0.0114],
        [ 0.0122,  0.0305, -0.0369,  ...,  0.0375, -0.0067,  0.0272]],
       device='cuda:0', grad_fn=<SliceBackward>)
layer : linear_relu_stack.2.bias | size : torch.Size([512]) | values tensor([0.0340, 0.0047], device='cuda:0', grad_fn=<SliceBackward>)
layer : linear_relu_stack.4.weight | size : torch.Size([10, 512]) | values tensor([[-0.0246,  0.0337,  0.0405,  ...,  0.0278,  0.0405,  0.0202],
        [-0.0355, -0.0214, -0.0134,  ...,  0.0190