In [1]:
import numpy as np
import torch

# Pytorch size meaning
- (3, 4, 2)
- first comes as dimention 
- second matrix rows
- third matrix columns

X =
\begin{bmatrix}
\begin{pmatrix}
1 & 0 \\
2 & 3
\end{pmatrix},
\begin{pmatrix}
4 & 1 \\
0 & 2
\end{pmatrix}
\end{bmatrix}

W =
\begin{bmatrix}
\begin{pmatrix}
1 & 2 \\
0 & 1
\end{pmatrix},
\begin{pmatrix}
-1 & 0 \\
2 & 3
\end{pmatrix}
\end{bmatrix}


In [9]:
X = [
    [
        [1, 0],
        [2, 3]
    ],
    [
        [4, 1],
        [0, 2]
    ]
]

W = [
    [
        [1,2],
        [0, 1]
    ],
    [
        [-1, 0],
        [2, 3]
    ]
]

X = np.array(X)
W = np.array(W)

print("Numpy version")
print(X.shape); print(W.shape); print(X @ W)

X = torch.tensor(X)
W = torch.tensor(W)

print("\nPytorch Tensor")
print(X@W)

Numpy version
(2, 2, 2)
(2, 2, 2)
[[[ 1  2]
  [ 2  7]]

 [[-2  3]
  [ 4  6]]]

Pytorch Tensor
tensor([[[ 1,  2],
         [ 2,  7]],

        [[-2,  3],
         [ 4,  6]]])


A 3-layer input passes through linear layer weights:
	•	Input:
x =
\begin{pmatrix}
1 & 2 & 3
\end{pmatrix}
	•	Weights:
W =
\begin{pmatrix}
1 & 0 \\
2 & 1 \\
-1 & 3
\end{pmatrix}

Compute output xW.

Then add bias:
b =
\begin{pmatrix}
1 & 2
\end{pmatrix}

Final result = xW + b

In [20]:
x = torch.tensor([1, 2, 3])
W = torch.tensor([
    [1, 0],
    [2, 1],
    [-1, 3]
])
b = torch.tensor([1, 2])

out = [
    [2,11]
]

x.shape, W.shape, W.shape == (3, 2), (1, 2), x @ W + b

(torch.Size([3]), torch.Size([3, 2]), True, (1, 2), tensor([ 3, 13]))

In [23]:
A = [
    [
        [1, 0],
        [2, 3]
    ],
    [
        [4, 1],
        [0, 2]
    ],
    [
        [4, 1],
        [0, 2]
    ],
]

B = [
    [
        [1,2],
        [0, 1]
    ],
    [
        [-1, 0],
        [2, 3]
    ]
]

print(np.array(A).shape); print(torch.tensor(A).shape)

(3, 2, 2)
torch.Size([3, 2, 2])


In [26]:
torch.rand((5, 2, 2)), torch.randint(0, 1, (7, 3, 2))

(tensor([[[0.6663, 0.3907],
          [0.7889, 0.0619]],
 
         [[0.0079, 0.2142],
          [0.8482, 0.1581]],
 
         [[0.5592, 0.3615],
          [0.7184, 0.1163]],
 
         [[0.6747, 0.1796],
          [0.8750, 0.9304]],
 
         [[0.2661, 0.1588],
          [0.9100, 0.8441]]]),
 tensor([[[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]],
 
         [[0, 0],
          [0, 0],
          [0, 0]]]))

Given:
	•	Batch input X: shape (3,4)

X =
\begin{pmatrix}
1 & 2 & 3 & 4 \\
0 & 1 & -1 & 2 \\
2 & 0 & 1 & 3
\end{pmatrix}
	•	Weight matrix W: shape (4, 3)

W =
\begin{pmatrix}
1 & 0 & 2 \\
-1 & 1 & 0 \\
2 & -2 & 1 \\
0 & 3 & -1
\end{pmatrix}
	•	Bias b = (1, 2, -1)

Compute output:
Z = XW + b

Then apply ReLU manually:
\text{ReLU}(z) = \max(0, z)

In [32]:
X = np.array([
    [1, 2, 3, 4], 
    [0, 1, -1, 2],
    [2, 0, 1, 3]
])

W = np.array([
    [1, 0, 2],
    [-1,1, 0],
    [2, -2, 1],
    [0, 3, -1]
])

b = np.array([1, 2, -1])


z = torch.tensor(X) @ torch.tensor(W) + torch.tensor(b)

print(z); print(z.shape); 
# print(torch.nn.ReLU(z))

tensor([[ 6, 10,  0],
        [-2, 11, -4],
        [ 5,  9,  1]])
torch.Size([3, 3])


Given vectors:

u = (2, 3, 1), \quad v = (4, 0, -2, 1)

Compute:

u^T \cdot v

In [51]:
u = torch.tensor([2, 3, 1])
v = torch.tensor([4, 0, -2, 1])

print(u.shape); print(v.shape); print(u.T.shape)


v.view(1, -1).shape, u.T.unsqueeze(-1).shape, u.T.unsqueeze(-1) @ v.view(1, -1)

torch.Size([3])
torch.Size([4])
torch.Size([3])


(torch.Size([1, 4]),
 torch.Size([3, 1]),
 tensor([[ 8,  0, -4,  2],
         [12,  0, -6,  3],
         [ 4,  0, -2,  1]]))

✅ Level 8 — Tensor Reshape + MatMul

You have tensor T shape (2, 3, 4):

T =
\begin{bmatrix}
\begin{pmatrix}
1 & 0 & 2 & 3 \\
2 & 1 & 0 & 1 \\
1 & 1 & 1 & 1
\end{pmatrix},
\begin{pmatrix}
0 & 2 & 1 & 1 \\
3 & 1 & 2 & 0 \\
2 & 3 & 0 & 1
\end{pmatrix}
\end{bmatrix}

And weight matrix W shape (4,2):

W =
\begin{pmatrix}
1 & 0 \\
2 & 1 \\
0 & 1 \\
3 & -1
\end{pmatrix}

Tasks
	1.	Multiply using batch matrix multiply: torch.bmm(T, W.unsqueeze(0).repeat(2,1,1))
	2.	Reshape T to shape (6,4) and multiply by W

Results must match (reshaped output → reshape back to (2,3,2))

In [61]:
T = np.array([
    [
       [1, 0, 2, 3],
       [2, 1, 0, 1],
       [1, 1, 1, 1]
    ],
    [
       [0, 2, 1, 1],
       [3, 1, 2, 0],
       [2,3, 0, 1]
    ]
])


[[1, 0, 2, 3],
 [2, 1, 0, 1],
 [1, 1, 1, 1],
 [0, 2, 1, 1],
 [3, 1, 2, 0],
 [2, 3, 0, 1]]



W = np.array([
    [1, 0],
    [2, 1],
    [0, 1],
    [3, -1]
])


T = torch.tensor(T)
W = torch.tensor(W)


print(T.view(2*3, 4)) # 2 dimention becomes as 1
print((T.view(6, 4)@W).view(2, 3, 2))
print(T.shape)

print("2 version")
print((T@W));
print((T@W).shape); 
print('Unsqueezed')
print(W.unsqueeze(0))


tensor([[1, 0, 2, 3],
        [2, 1, 0, 1],
        [1, 1, 1, 1],
        [0, 2, 1, 1],
        [3, 1, 2, 0],
        [2, 3, 0, 1]])
tensor([[[10, -1],
         [ 7,  0],
         [ 6,  1]],

        [[ 7,  2],
         [ 5,  3],
         [11,  2]]])
torch.Size([2, 3, 4])
2 version
tensor([[[10, -1],
         [ 7,  0],
         [ 6,  1]],

        [[ 7,  2],
         [ 5,  3],
         [11,  2]]])
torch.Size([2, 3, 2])
Unsqueezed
tensor([[[ 1,  0],
         [ 2,  1],
         [ 0,  1],
         [ 3, -1]]])


 Level 9 — Gradient Interpretation

Let

x = \begin{pmatrix}1 & -2\end{pmatrix},\
W = \begin{pmatrix}3 & 1 \\ 2 & -1\end{pmatrix}

Compute manually:

y = xW

Then compute Jacobian \frac{\partial y}{\partial x}.

In [None]:
x = torch.tensor([1, -2], requires_grad=True, dtype=torch.float32)
W = torch.tensor([
    [3, 1],
    [2, -1]
], requires_grad=True, dtype=torch.float32)



f = (x @ W)

# derivative = dy/dx=W


print(f)
print(f.shape)
print(x.shape, W.shape); 

f.backward(torch.ones_like(f))

print('f',f)
print('x grad',x.grad)
print((1, 2), (2, 2)); 
print("out:", (1, 2))



tensor([-1.,  3.], grad_fn=<SqueezeBackward4>)
torch.Size([2])
torch.Size([2]) torch.Size([2, 2])
f tensor([-1.,  3.], grad_fn=<SqueezeBackward4>)
x grad tensor([4., 1.])
(1, 2) (2, 2)
out: (1, 2)


In [89]:
x = torch.tensor([1, -2], requires_grad=True, dtype=torch.float32)
W = torch.tensor([
    [3, 1],
    [2, -1]
], requires_grad=True, dtype=torch.float32)


x.grad = None
f = x @ W
print(f)
loss = f.sum()
print(loss)
loss.backward()
print(x.grad)

tensor([-1.,  3.], grad_fn=<SqueezeBackward4>)
tensor(2., grad_fn=<SumBackward0>)
tensor([4., 1.])
