In [1]:
import torch
import torch.nn as nn

device = "mps" if torch.backends.mps.is_available() else "cpu"

In [2]:
'''
Basic arithmetic with broadcasting.
Create a tensor A of shape (3, 1) and a tensor B of shape (1, 4). 
Perform element-wise addition and element-wise multiplication on A and B. 
Explain how broadcasting allows this operation to be performed.
'''

A = torch.rand(size=(3,1)).to(device)
B = torch.rand(size=(1,4)).to(device)

n = A.shape[0]
m = B.shape[1]

C = torch.zeros(size=(n,m)).to(device)
for i in range(n):
    for j in range(m):
        C[i][j] = A[i][0] + B[0][j]

print(C)


tensor([[1.2390, 0.8470, 0.6567, 0.6765],
        [1.7383, 1.3463, 1.1559, 1.1758],
        [0.8531, 0.4611, 0.2707, 0.2906]], device='mps:0')


In [5]:
C_= torch.broadcast_shapes((3,1),(1,4)) # C gets broadcasted shape of (3,4)
C_ = A+B
assert(torch.sum(C_ == C) == C.shape[0]*C.shape[1]) 
# Number of equal elements in both the tensors should be equal to the tot number of tensors

### Broadcasting:
- In linear algebra, addition, subtraction, multiplication, and division of matrices (or tensors) require them to have the exact same shape (element-wise operations).
- Broadcasting is a mechanism that relaxes this constraint. When two tensors have different shapes, PyTorch (and NumPy) attempts to align them by "stretching" the smaller tensor along its dimension(s) so that the resulting shapes are compatible. 



### Rule of Broadcasting:
- Each tensor has at least one dimension.
- When iterating over the dimension sizes, starting at the trailing dimension, the dimension sizes must either be equal, one of them is 1, or one of them does not exist.
- If a dimension is missing, always pad in the leading dimensions. Eg. (4,) -> (1,4) 

Eg. A = (3,1,2), B = (2,)
These 2 are broadcastable because:

```
A = (3,1,2)
B = (1,1,2)
```

And hence the result of the broadcasted tensor is (3,1,2)

Eg. 

```
A = (4, 1, 6, 1)
B = (1, 5, 1, 8)
```

C = A + B <br>
C = (4,5,6,8)

In [12]:
# Write a function `is_broadcastable(shape1, shape2)` 
# that returns True/False if the two shapes can be broadcast together
# according to PyTorch rules.
#
# Example:
# is_broadcastable((4,1,3), (1,5,1)) ➜ True
# is_broadcastable((4,2), (3,)) ➜ False

def is_broadcastable(A: torch.Tensor, B:torch.tensor)-> bool:
    shape1 = A.shape
    shape2 = B.shape

    if len(shape1) > len(shape2):
        while(len(shape1) != len(shape2)):
            B = torch.unsqueeze(B, dim=0)
            shape2 = B.shape
    else:
        while(len(shape1) != len(shape2)):
            A = torch.unsqueeze(A, dim=0)
            shape1 = A.shape
    
    assert(len(shape1) == len(shape2))
    print(f"Shape of A : {A.shape} \nShape of B : {B.shape}")
    
    n = len(shape1)
    
    for i in range(n):
        if ((shape1[n-i-1] != shape2[n-i-2]) and (shape1[n-i-1] != 1) and (shape2[n-i-1] != 1)):
            return False
    
    return True


In [13]:
A = torch.rand(size=(4,1,3))
B = torch.rand(size=(1,5,1))
is_broadcastable(A,B)

Shape of A : torch.Size([4, 1, 3]) 
Shape of B : torch.Size([1, 5, 1])


True

In [45]:
# Create a tensor of shape (3,1)
# Expand it to (3,5) using .expand()
# Verify that .expand() doesn’t allocate new memory (use a.is_shared_storage(b))

a = torch.rand(size=(3,1)).to("mps")
b = torch.expand_copy(a,size=(3,5)).to("mps")
print(a.data_ptr())
print(b.data_ptr())

4631818272
5144374112


In [35]:
# Given x = torch.rand(3,1,5) and y = torch.rand(1,4,1)
# Perform x + y and print shape.
# Confirm with manual reasoning.
x = torch.rand(3,1,5)
y = torch.rand(1,4,1)

z = x+y
print(z.shape)
z_ = torch.broadcast_shapes(x.shape,y.shape)
# assert(z.shape == z_.shape)
assert(z.shape == z_)

torch.Size([3, 4, 5])


In [43]:
# Given a tensor X of shape (batch_size, num_features)
# Subtract mean of each feature and divide by its std, 
# using broadcasting (without explicit loops).
# X_norm = (X - X.mean(dim=0)) / X.std(dim=0)

batch_size = 5
num_features = 10
X = torch.rand(size=(batch_size,num_features))
mean = torch.mean(X,dim=0) # mean along every feature and therefore there will be 10 different means
std = torch.std(X,dim=0)
X = (X-mean)/std

print(f"Normalised Data :{X}")

Normalised Data :tensor([[-1.4327e+00,  1.5524e-01, -6.3545e-01,  2.5937e-01,  9.4289e-01,
         -6.0890e-01,  3.8253e-01,  1.1252e+00,  4.2250e-01,  7.5454e-04],
        [-5.5106e-01,  1.5899e+00,  8.7424e-01, -1.0110e+00,  1.1499e+00,
          1.5305e+00,  1.2008e+00,  4.8330e-01, -6.6470e-01, -1.0054e+00],
        [ 1.0955e+00, -5.7064e-01, -1.3766e+00, -8.7428e-01, -2.5748e-01,
         -1.1187e+00, -1.4135e+00, -1.4218e+00, -1.2974e+00,  3.1168e-01],
        [ 2.8755e-01, -1.0524e+00,  1.8848e-01,  1.4550e+00, -7.7960e-01,
          1.8778e-01,  3.6193e-01, -5.7360e-01,  1.2754e+00, -8.0501e-01],
        [ 6.0072e-01, -1.2217e-01,  9.4936e-01,  1.7089e-01, -1.0557e+00,
          9.2866e-03, -5.3169e-01,  3.8696e-01,  2.6426e-01,  1.4980e+00]])


In [None]:
# Given two tensors A and B of shapes (N, D) and (M, D),
# compute the pairwise Euclidean distance matrix of shape (N, M)
# using broadcasting (no loops).
#
N = 10
D = 20
M = 30

A = torch.rand(size=(N,D))
B = torch.rand(size=(M,D))
DIST = torch.zeros(size=(N,M))

DIST = 


RuntimeError: The size of tensor a (10) must match the size of tensor b (30) at non-singleton dimension 0

In [57]:
b = torch.rand((3,)).to(device)
X = torch.rand((32,3,28,28)).to(device)
b = b.unsqueeze(1).unsqueeze(1)
print(b.shape)

# c = torch.broadcast_shapes(b.shape,X.shape)
y = X+b
print(y.shape)


torch.Size([3, 1, 1])
torch.Size([32, 3, 28, 28])


In [None]:
# Given an input tensor x of shape (batch, channels, height, width)
# and a bias tensor b of shape (channels,),
# add the bias to each channel using broadcasting.

X = torch.rand((32,3,28,28)).to(device)

# One method of doing this:
'''
b = torch.rand((3,)).to(device)

b = b.unsqueeze(1)
b = b.unsqueeze(2).unsqueeze(2)
'''

# Other method of doing this:
'''
b = b.unsqueeze(1).unsqueeze(1)
print(b.shape)
'''

y = X+b
print(X.shape)
print(b.shape)
print(y.shape)



torch.Size([32, 3, 28, 28])
torch.Size([1, 3, 1, 1])
torch.Size([32, 3, 28, 28])


In [None]:
# Given vectors a (shape (3,)) and b (shape (4,)), 
# compute their outer product (3x4) using broadcasting.
a = torch.rand((4,))
b = torch.rand((3,))
dot_ = 

RuntimeError: inconsistent tensor size, expected tensor [4] and src [3] to have the same number of elements, but got 4 and 3 elements respectively

In [None]:
# Implement a function that normalizes tensor x 
# such that softmax is applied over the last dimension:
# def softmax(x):
#     exp_x = torch.exp(x - x.max(dim=-1, keepdim=True).values)
#     return exp_x / exp_x.sum(dim=-1, keepdim=True)
#
# Understand how broadcasting ensures correct denominator division.


In [None]:
# Given A, B of shape (batch, dim), compute cosine similarity per batch:
# cosine_sim = (A * B).sum(dim=1) / (A.norm(dim=1) * B.norm(dim=1))
# Identify all broadcasted operations.


In [None]:
# Given an image batch x of shape (N, C, H, W)
# Normalize each channel using channel-wise mean and std (both of shape (C,))
# Use broadcasting to implement this.


In [None]:
# Given logits of shape (batch, num_classes)
# and labels as class indices of shape (batch,)
# implement one-hot encoding manually and compute MSE loss using broadcasting.


In [None]:
# Create a tensor t of shape (10,)
# Reshape and expand it to (10, 5, 1, 8) using view/unsqueeze/expand combo.


In [None]:
# x = torch.rand(5,4,3)
# y = torch.rand(4,1)
# x + y  # Throws error.
# Fix it using view/unsqueeze so broadcasting works.


In [None]:
# Create a tensor image (1, 1, 5, 5) and kernel (1, 1, 3, 3)
# Use unfold() + broadcasting to manually perform 2D convolution
# (no nn.Conv2d, pure tensor ops).


In [None]:
# Implement your own function broadcast_add(a, b) that adds two tensors manually by:
# Expanding singleton dimensions yourself
# Using .expand() or .repeat()
# Without using a + b directly.