# Build NN in torch

In [1]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch import nn
import numpy as np

import matplotlib.pyplot as plt

In [2]:
# Check available device
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
    
)
print(f"Using {device} device")

Using mps device


---

In [14]:
# Build NN
# 1. subclass nn.Module
# 2. implement __init__() and forward()

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

# Create an instance and move it to device
model = NeuralNetwork().to(device)

# Print structure
print(model)
print('------------------------')
# OR 
for i, module in enumerate(model.modules()):
    # print(type(module), module)
    print(module)

print('------------------------')
# Model parameter:
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
------------------------
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Flatten(start_dim=1, end_dim=-1)
Sequential(
  (0): Linear(in_features=784, out_features=512, bias=True)
  (1): ReLU()
  (2): Linear(in_features=512, out_features=512, bias=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=10, bias=True)
)
Linear(in_features=784, out_features=512, bias=True)
ReLU()
Linear(in_features=512,

In [12]:
# Run NN
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([1], device='mps:0')


# Grad

In [26]:
x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

# Can only backward once(for performance reason)
loss.backward()
print(w.grad, w.requires_grad)
print(b.grad)

# No grad for x and z
print(x.grad)
print(z.grad, z.requires_grad)

# Check if leaf node
print(x.is_leaf, w.is_leaf, z.is_leaf)

tensor([[0.0212, 0.0324, 0.2545],
        [0.0212, 0.0324, 0.2545],
        [0.0212, 0.0324, 0.2545],
        [0.0212, 0.0324, 0.2545],
        [0.0212, 0.0324, 0.2545]]) True
tensor([0.0212, 0.0324, 0.2545])
None
None True
True True False


  print(z.grad, z.requires_grad)


# Save and Load model

In [None]:
# Save model
torch.save(model.state_dict(), "model.pth")

# Initilize the model class
model = NeuralNetwork().to(device)
# Load parameter
model.load_state_dict(torch.load("model.pth"))