In [1]:
import torch
import numpy as np

# Basic tensor concept

In [2]:
tensor1 = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32);
tensor1.dtype

torch.float32

In [3]:
tensor1.shape # shape and .size() creates the same output
tensor1.numel() # count the number of element in tensor

4

In [4]:
# creating tensor from numpy array
arr1 = np.array([[1, 2], [3, 4]])
tensor2 = torch.from_numpy(arr1).to(torch.float32) # because of to() is not (not in-place) function
tensor2

tensor([[1., 2.],
        [3., 4.]])

In [5]:
torch.allclose(tensor1, tensor2)

True

In [6]:
# convert tensor to numpy array
tensor2.numpy().astype(np.float16)

array([[1., 2.],
       [3., 4.]], dtype=float16)

# torch.autograd
- Autograd is Pytorch's automatic differentiation sublibrary or engine, included in pytorch

In [7]:
from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
print(model)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\sofia/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44.7M/44.7M [00:01<00:00, 36.7MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [8]:
prediction = model(data)
prediction.shape

torch.Size([1, 1000])

In [9]:
# use MAE as cost function 
loss = torch.abs(prediction - labels)
cost = torch.div(loss.sum(), prediction.numel())
type(cost)
cost.backward(retain_graph=True)

In [10]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [11]:
optim.step()

## Note: Math symbol
$\cdot$: is the symbol for dot product

$\odot$: is the symbol for hadamard product


# Neural networks

## Define the network

In [12]:
import torch.nn as nn
import torch.nn.functional as F

In [13]:
# Implementing LeNet
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        
        # Define input channel, output channel, and kernel size
        self.conv1 = nn.Conv2d(1, 6, (5, 5))
        self.conv2 = nn.Conv2d(6, 16, (5, 5))
        
        # Define 3 layers fully connected 
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, input):
        # Let's build the forward probagation of LeNet
        # c_n is the notation for the output layer which applied 
        # convolution operation.
        # s_n is the notation for the output layer which applied 
        # subsampling technique with max-pooling operation.
        c1 = F.relu(self.conv1(input))
        s1 = F.avg_pool2d(c1, (2, 2))
        c2 = F.relu(self.conv2(s1))
        s2 = F.avg_pool2d(c2, (2, 2))
        flatten = torch.flatten(s2)
        f1 = F.relu(self.fc1(flatten))
        f2 = F.relu(self.fc2(f1))
        output = F.softmax(self.fc3(f2))
        return output

lenet = LeNet()
print(lenet)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [14]:
# Show the information of parameter size in model
params = list(lenet.parameters())
print(f"Number of param tensors in model {len(params)}")
print(f"Number of params in model {sum(param.numel() for param in params)}")

for i in lenet.parameters():
    print(i.shape)

Number of param tensors in model 10
Number of params in model 61706
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [17]:
# Pass the random output to model
data = torch.randn(1, 32, 32)
output = lenet(data)
print(output)

tensor([0.0963, 0.1006, 0.1092, 0.0975, 0.0997, 0.1017, 0.1083, 0.1048, 0.0917,
        0.0902], grad_fn=<SoftmaxBackward0>)


  output = F.softmax(self.fc3(f2))


# Loss function

In [21]:
def seed_everything(seed=42):
    import torch
    import numpy as np
    import random
    # Set the seed for PyTorch
    torch.manual_seed(seed)
    
    # If you are using CUDA
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    
    # Set the seed for NumPy
    np.random.seed(seed)
    
    # Set the seed for Python's built-in random module
    random.seed(seed)
    
    # Set deterministic behavior for CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Call the function to seed everything
seed_everything(42)

In [24]:
target = torch.randn_like(output)
criterion = nn.MSELoss()

loss = criterion(target, output)
print(loss)

tensor(1.0233, grad_fn=<MseLossBackward0>)


# Backprop

In [29]:
lenet.zero_grad() # set gradient buffers as zeroes
print(lenet.conv1.bias.grad)
loss.backward()
print(lenet.conv1.bias.grad)

None


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.