In [1]:
import torch
import numpy as np

In [2]:
tensor = torch.tensor([1, 2, 3])

In [3]:
tensor * tensor

tensor([1, 4, 9])

In [4]:
tensor = torch.tensor([[1, 2, 3],
                     [4,5,6],
                     [7,8,9]])

In [5]:
tensor * tensor

tensor([[ 1,  4,  9],
        [16, 25, 36],
        [49, 64, 81]])

In [6]:
# when we talk about matrix multiplication we are not talking about this above.
# rather we talk about the one below

In [7]:
tensor @ tensor

tensor([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])

In [8]:
torch.matmul(tensor, tensor)

tensor([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])

In [9]:
# in this case shapes should match in a way that
# (row by column) @ (row by column) 
# first column should be equal to second row
# and the result will be first row by second column
# let's see that in action

In [10]:
tensor1 = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.int32)

tensor2 = torch.tensor([[7, 8],
                         [9, 10], 
                         [11, 12]], dtype=torch.int32)

In [11]:
tensor1.shape

torch.Size([3, 2])

In [12]:
tensor2.shape

torch.Size([3, 2])

In [13]:
# as you can see 2 != 3 so this will produce an error

In [14]:
tensor1 @ tensor2

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [15]:
# if it makes sense to redefine or transpose the second matrix, this will be fine like this

In [16]:
tensor2.T

tensor([[ 7,  9, 11],
        [ 8, 10, 12]], dtype=torch.int32)

In [17]:
tensor2

tensor([[ 7,  8],
        [ 9, 10],
        [11, 12]], dtype=torch.int32)

In [18]:
tensor2 = tensor2.T

In [19]:
tensor2

tensor([[ 7,  9, 11],
        [ 8, 10, 12]], dtype=torch.int32)

In [20]:
tensor1 @ tensor2

tensor([[ 23,  29,  35],
        [ 53,  67,  81],
        [ 83, 105, 127]], dtype=torch.int32)

In [21]:
tensor1.shape, tensor2.shape

(torch.Size([3, 2]), torch.Size([2, 3]))

In [22]:
# 2 == 2 and the result will be 3 by 3

In [23]:
# let's see an interesting one

In [24]:
A = torch.tensor([[1.0, 2.0, 3.0]])

In [25]:
A.shape

torch.Size([1, 3])

In [26]:
A.ndim

2

In [27]:
B = torch.tensor([[4.0],
                  [5.0],
                  [6.0]])

In [28]:
B.shape

torch.Size([3, 1])

In [29]:
B.ndim

2

In [30]:
A @ B

tensor([[32.]])

In [31]:
# [1,3] and [3,1] 
# since 3 == 3 it works and the result will be 1 by 1

In [32]:
# why everything is related with matrix multiplication in deep learning?

In [33]:
torch.manual_seed(42)

# 1. Example dataset: [study_hours, sleep_hours, social_media_hours]
# 3 students, each with 3 features

X = torch.tensor([
    [5.0, 7.0, 2.0],   # Student A: studies 5h, sleeps 7h, social 2h
    [3.0, 5.0, 5.0],   # Student B
    [8.0, 6.0, 1.0]    # Student C
])   # shape: (3, 3)

print("Input shape:", X.shape, "\n")

# 2. Linear model to predict the grade
# One output = predicted grade (with this data obviously we are not predicting anything right, it is just for demonstration)

model = torch.nn.Linear(in_features=3, out_features=1)

predicted_grades = model(X)
# (we add an activation function generally after these layers but in this case it will just return the same thing, no need to add it but if you want you can test it in step 4)
print("Predicted grades (from nn.Linear):\n", predicted_grades)
print("Output shape:", predicted_grades.shape, "\n")

# 3. What does the Linear layer REALLY does:
# grade = X @ W^T + b

W = model.weight        # shape: (1, 3)
b = model.bias          # shape: (1,)

manual_output = X @ W.T + b

print("Manual matrix multiplication output:\n", manual_output)
print("Matches nn.Linear output:", torch.allclose(predicted_grades, manual_output), "\n")

# 4. Add a simple "activation" 
relu_output = torch.relu(predicted_grades)
print("After ReLU (forcing positive values):\n", relu_output)

Input shape: torch.Size([3, 3]) 

Predicted grades (from nn.Linear):
 tensor([[5.8213],
        [3.5743],
        [6.8016]], grad_fn=<AddmmBackward0>)
Output shape: torch.Size([3, 1]) 

Manual matrix multiplication output:
 tensor([[5.8213],
        [3.5743],
        [6.8016]], grad_fn=<AddBackward0>)
Matches nn.Linear output: True 

After ReLU (forcing positive values):
 tensor([[5.8213],
        [3.5743],
        [6.8016]], grad_fn=<ReluBackward0>)
