## ESC708 Pytorch Basics

In [None]:
import torch
import torch.nn as nn

### 1. Tensor Operations and Dimensions

In [None]:
# Correct matrix multiplication: (2x3) * (3x4) = (2x4)
# (d x n) @ (n x f) = (d x f)
# torch.matmul is equivalent to @
a = torch.randn(2, 3)
b = torch.randn(3, 4)
c = torch.matmul(a, b)  # Works
c

tensor([[-0.4276,  1.2581, -1.5774, -1.2810],
        [ 0.0693, -0.7371, -0.7794,  2.2299]])

In [None]:
# Incorrect matrix multiplication: mismatched dimensions (2x3) * (2x4)
# https://pytorch.org/docs/stable/generated/torch.matmul.html
c = torch.matmul(a, b.T)  # Should raise an error

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x3 and 4x3)

In [None]:
# what about when some dimensions (excluding the final 2) do not match ?
a = torch.randn(10, 2, 3, 4)
b = torch.randn(10, 3, 4, 5)
a @ b # error

RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1

In [None]:
# Can we also do matmul for n-dim matrices? yes! - but the dimensions need to match
a = torch.randn(10, 3, 4)
b = torch.randn(5, 4, 5)
c = torch.matmul(a, b)

# (3x4)@(4x5)=(3x5)
c.size()  # torch.Size([10, 3, 5])

RuntimeError: The size of tensor a (10) must match the size of tensor b (5) at non-singleton dimension 0

In [None]:
# Does the order matter in torch.matmul() ? Yes
c = torch.matmul(b, a)
c.size()  # Expected Error..

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x5 and 3x4)

In [None]:
# Element-wise multiplication (Hadamart Product) - How does this differ from matrix multiplication ?

# Two tensors of the same shape (2x3)
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
b = torch.tensor([[7, 8, 9], [10, 11, 12]])

# Element-wise multiplication (same shape)
element_wise = torch.mul(a,b)  # Each element is multiplied with the corresponding element in the other tensor
print("Element-wise multiplication:\n", element_wise)

Element-wise multiplication:
 tensor([[ 7, 16, 27],
        [40, 55, 72]])


In [None]:
# You can also do this with tensors of different shapes as long as they are broadcastable.
# Broadcasting automatically expands the smaller tensor so that both tensors have compatible shapes:
# Broadcastable shapes: (2x3) * (1x3)
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
b = torch.tensor([[7, 8, 9], ])  # Shape (1x3) can be broadcast to (2x3)

element_wise_broadcast = torch.mul(a,b)
print("Element-wise multiplication with broadcasting:\n", element_wise_broadcast)

Element-wise multiplication with broadcasting:
 tensor([[ 7, 16, 27],
        [28, 40, 54]])


In [None]:
# More broadcasting examples...
a = torch.ones(3, 1)
b = torch.zeros(3)

# torch.ones(3,3) + torch.zeros(3,3)

# Correct broadcasting
c = a + b  # Shape: (3, 3), works
print(c)

# Incorrect broadcasting (mismatched dimensions)
b = torch.ones(4)
# a: torch.ones(3,4), b torch.ones(3,4)
c = a + b
print(c)


tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])


In [None]:
# Rearranging dimensions

# Create a 3D tensor of shape (2, 3, 4)
a = torch.randn(2, 3, 4)

# Transpose dimensions 1 and 2 (shape becomes (2, 4, 3))
a_transposed = a.transpose(1, 2)
print("Original shape:", a.shape)  # (2, 3, 4)
print("After transpose(1, 2):", a_transposed.shape)  # (2, 4, 3)

Original shape: torch.Size([2, 3, 4])
After transpose(1, 2): torch.Size([2, 4, 3])


In [None]:
a = torch.randn(2, 3, 4)
a.T # note user warning. dont use .T for anything other than 2 dims.

  a.T


tensor([[[ 1.0655, -0.0180],
         [ 1.7758,  0.1558],
         [ 2.2286,  0.0860]],

        [[ 2.5845, -0.3120],
         [ 0.3207,  0.7466],
         [-0.6242,  0.4851]],

        [[-0.8223, -1.1351],
         [ 0.3441, -1.4254],
         [-2.1659, -0.4221]],

        [[-0.0251,  0.1085],
         [-0.5064, -0.2434],
         [-1.0043, -0.6664]]])

In [None]:
# Is this equivalent to .T?  Yes, but .T only works for 2D tensors.

# Create a 2D tensor (matrix) of shape (2, 3)
a = torch.randn(2, 3)

# Use .T to transpose the matrix (shape becomes (3, 2))
a_transposed = a.T
print("Original shape:", a.shape)  # (2, 3)
print("After .T:", a_transposed.shape)  # (3, 2)

Original shape: torch.Size([2, 3])
After .T: torch.Size([3, 2])


In [None]:
# Reshaping a tensor

# Create a tensor of shape (2, 6)
a = torch.randn(2, 6) # 12 values

# Reshape it to (3, 4)
reshaped_a = a.reshape(3, 4)
print("Original shape:", a.shape)  # (2, 6)
print("Reshaped shape:", reshaped_a.shape)  # (3, 4)

Original shape: torch.Size([2, 6])
Reshaped shape: torch.Size([3, 4])


In [None]:
# What does -1 mean in reshape? This is handy because pytorch can infer the correct size.

# Create a tensor of shape (2, 3, 4) - 24 elements in total
a = torch.randn(2, 3, 4)
print("Original shape:", a.shape)  # (2, 3, 4)

# Reshape to (6, -1): PyTorch will infer the second dimension (should be 4)
reshaped_a = a.reshape(6, -1)
print("Reshaped to (6, -1):", reshaped_a.shape)  # (6, 4)

# Reshape to (-1, 8): PyTorch will infer the first dimension (should be 3)
reshaped_b = a.reshape(-1, 8)
print("Reshaped to (-1, 8):", reshaped_b.shape)  # (3, 8)

Original shape: torch.Size([2, 3, 4])
Reshaped to (6, -1): torch.Size([6, 4])
Reshaped to (-1, 8): torch.Size([3, 8])


In [None]:
import torch
x = torch.matmul(1,b)
y = torch.mat(4,3)
# In place operations. Why do we need them? - Memory efficient

# Create a tensor
x = torch.tensor([1.0, 2.0, 3.0])

# In-place addition: x = x + 1
x += torch.tensor([1.0, 0.0, 3.0])
#
x = x + torch.tensor([1.0, 0.0, 3.0])
print("After in-place addition:", x)  # tensor([2., 3., 4.])

# In-place multiplication: x = x * 2
x *= 2
print("After in-place multiplication:", x)  # tensor([ 4.,  6.,  8.])


After in-place addition: tensor([3., 2., 9.])
After in-place multiplication: tensor([ 6.,  4., 18.])


In [None]:
# Create a 2D tensor
x = torch.tensor([[1.0, 2.0], [3.0, 4.0]])

# Mean of all elements in the tensor
mean_all = torch.mean(x)
print("Mean of all elements:", mean_all)  # tensor(2.5000)

Mean of all elements: tensor(2.5000)


In [None]:
# Mean along dimension 0 (rows)
mean_dim0 = torch.mean(x, dim=0)
print("Mean along dimension 0 (rows):", mean_dim0)  # tensor([2., 3.])

# Mean along dimension 1 (columns)
mean_dim1 = torch.mean(x, dim=1)
print("Mean along dimension 1 (columns):", mean_dim1)  # tensor([1.5000, 3.5000])

Mean along dimension 0 (rows): tensor([2., 3.])
Mean along dimension 1 (columns): tensor([1.5000, 3.5000])


In [None]:
# Tensor Initialisation !

# Zero tensor
x_zeros = torch.zeros(3, 3)  # 3x3 tensor filled with zeros

# One tensor
x_ones = torch.ones(3, 3)  # 3x3 tensor filled with ones

# Random tensor (normal distribution)
x_random = torch.randn(3, 3)

# Tensor with values from a range
x_range = torch.arange(0, 10, step=2)  # 0, 2, 4, 6, 8

# Tensor filled with a constant value
x_full = torch.full((3, 3), 7)  # 3x3 tensor filled with the value 7

## 2. The model class, nn.Module()

In [None]:
# Define a simple model
class SimpleModel(nn.Module):
    # Here we can define class variables
    def __init__(self):
        # Call the parent class (nn.Module) constructor to initialize the necessary PyTorch internals
        super().__init__()
        # here you define some class variables e.g weights, layers that..
        # that can be accessed in self.forward()
        self.y = torch.randn([10, 1])

    # forward() is where we compute an output from our input
    def forward(self, x):
        z = torch.matmul(self.y, x)
        return z

# Pass incorrect input shape
model = SimpleModel()
input_tensor = torch.randn(1, 8)  # Wrong shape (should be 10)
output = model(input_tensor)
print(output.shape)

torch.Size([10, 8])


In [None]:
# What is nn.Module() ?
#  https://pytorch.org/docs/stable/generated/torch.nn.Module.html
# Base class where we can inherit lots of inbuilt pytorch methods
# Useful for neural networks, not necessary for our implementations of LogReg and LinReg


## 3. Autograd

In [None]:
# We have implemented gradient descent from scratch.
# In future we'll use pytorch's inbuilt autograd which automatically handles the computation of gradients.

x = torch.tensor(2.0, requires_grad=True)  # Requires gradient tracking
y = x ** 2  # Some operation
y.backward()  # Compute gradients

print(x.grad)  # Gradient of y with respect to x: dy/dx = 2*x = 4.0

tensor(4.)


In [None]:
# What about nn.Parameter() ? It registers a tensor as 'trainable'
# We can compute gradients for this tensor when performing gradient descent.

In [None]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        # Define a trainable parameter using nn.Parameter
        self.weight = nn.Parameter(torch.randn(3, 3))  # A 3x3 trainable weight matrix

    def forward(self, x):
        # Perform a matrix multiplication with the custom weight
        return torch.matmul(x, self.weight)

# Instantiate the model
model = CustomModel()

# Print the trainable parameters of the model
print("Model parameters:", list(model.parameters()))

Model parameters: [Parameter containing:
tensor([[-1.1739,  1.5373,  0.3063],
        [-0.5537, -1.2269,  0.2839],
        [-0.5081, -0.4680, -1.3514]], requires_grad=True)]


## 4. Debugging, Documentation and Error Messages

In [None]:
# Print, Print, Print !

# Create two tensors
a = torch.randn(3, 4)
b = torch.randn(4, 5)

# Ensure tensor shapes are compatible before matrix multiplication
print("Shape of a:", a.shape)  # Expected shape: (3, 4)
print("Shape of b:", b.shape)  # Expected shape: (4, 5)

# Matrix multiplication (will succeed)
c = torch.matmul(a, b)
print("Shape of c:", c.shape)  # Expected shape: (3, 5)

Shape of a: torch.Size([3, 4])
Shape of b: torch.Size([4, 5])
Shape of c: torch.Size([3, 5])


In [None]:
# How to read the documentation ?

In [None]:
help(torch.mean)

Help on built-in function mean in module torch:

mean(...)
    mean(input, *, dtype=None) -> Tensor
    
    Returns the mean value of all elements in the :attr:`input` tensor. Input must be floating point or complex.
    
    Args:
        input (Tensor):
          the input tensor, either of floating point or complex dtype
    
    Keyword args:
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            If specified, the input tensor is casted to :attr:`dtype` before the operation
            is performed. This is useful for preventing data type overflows. Default: None.
    
    Example::
    
        >>> a = torch.randn(1, 3)
        >>> a
        tensor([[ 0.2294, -0.5481,  1.3288]])
        >>> torch.mean(a)
        tensor(0.3367)
    
    .. function:: mean(input, dim, keepdim=False, *, dtype=None, out=None) -> Tensor
       :noindex:
    
    Returns the mean value of each row of the :attr:`input` tensor in the given
    dimension :att