# Einsum Exercises Notebook
Progressive exercises (Beginner → Transformers).

In [1]:
import numpy as np
import torch
np.set_printoptions(suppress=True)

## Level 1 — Basic

### Exercise 1: Sum all elements

In [2]:
A = np.random.randint(1,10,(3,4))
A

array([[9, 7, 7, 5],
       [8, 2, 1, 9],
       [2, 8, 8, 1]])

In [3]:
# Your answer
np.einsum("ij->", A).item()

###############################
#GPT's answer
###############################
#np.einsum("ij->", A) #correct

67

### Exercise 2: Row-wise sum

In [4]:
# Your answer
#np.einsum("i->i",A) #wrong

###############################
#GPT's answer
###############################
np.einsum("ij->i", A) #wrong

array([28, 20, 19])

### Exercise 3: Column-wise sum

In [5]:
# Your answer
np.einsum("ij->j",A)

###############################
#GPT's answer
###############################
#np.einsum("ij->j",A) #correct

array([19, 17, 16, 15])

### Exercise 4: Transpose

In [6]:
# Your answer
np.einsum("ij->ji", A) #correct

array([[9, 8, 2],
       [7, 2, 8],
       [7, 1, 8],
       [5, 9, 1]])

### Exercise 5: Dot product

In [7]:
a=np.array([2,3,4]); b=np.array([5,6,7])

In [8]:
# Your answer
np.einsum("i,i->",a,b)

np.int64(56)

## Level 2 — Intermediate

### Exercise 6: Matmul

In [9]:
A=np.random.randint(1,5,(3,4)); B=np.random.randint(1,5,(4,5))

In [10]:
# Your answer
np.einsum("ij,jk->ik",A,B) #correct

array([[33, 28, 41, 27, 48],
       [27, 24, 36, 24, 42],
       [21, 18, 31, 19, 34]])

### Exercise 7: Outer product

In [11]:
# Your answer


### Exercise 8: Hadamard product

In [20]:
#exercises
#1
A = np.random.randint(1, 5, (3, 4))
B = np.random.randint(1, 5, (3, 4))

#2
A2 = np.random.randint(1, 5, (3, 4))
B2 = np.random.randint(1, 5, (4,))

#3
A3 = np.random.randint(1, 5, (3, 4))
C3 = np.random.randint(1, 5, (3, 1))
C3.shape


(3, 1)

In [23]:
# Your answer

#1
print(A)
print(B)
np.einsum("ij,ij->ij", A,B) #correct

#2
print(A2)
print(B2)
np.einsum("ij,j->ij",A2,B2) #correct

#3
print(A3)
print(C3)
np.einsum("ij,ik->ij",A3,C3) #wrong


[[1 2 2 4]
 [2 3 3 4]
 [2 4 1 1]]
[[3 2 2 4]
 [3 1 2 2]
 [4 4 1 3]]
[[1 1 1 2]
 [2 1 2 2]
 [4 4 2 2]]
[1 1 1 4]
[[1 4 3 3]
 [2 2 3 3]
 [2 4 3 2]]
[[3]
 [3]
 [1]]


array([[ 3, 12,  9,  9],
       [ 6,  6,  9,  9],
       [ 2,  4,  3,  2]])

### Exercise 9: Batched matmul

In [24]:
A=np.random.randn(5,3,4); B=np.random.randn(5,4,6)

In [25]:
# Your answer
np.einsum("bij,bjk->bik",A,B)

array([[[ 3.15183188,  0.61224916, -0.64457349, -0.1431397 ,
         -0.29703603,  2.19482262],
        [ 0.89868247,  0.03987541,  3.53382944,  1.74633631,
         -1.18822017, -2.06305535],
        [ 0.67613746,  0.65605984, -0.91611848, -0.08225785,
          0.10061148,  0.60595909]],

       [[-0.75634764, -4.0289572 , -2.71625864, -1.24123879,
          6.08447538, -1.78845029],
        [-2.42660109,  2.63713411, -1.945025  ,  1.53652764,
          3.05711747,  2.44563989],
        [ 2.84400599, -7.37769128, -0.56072504, -3.15083482,
         -0.484649  , -2.95117293]],

       [[-1.05016031,  3.59185185,  2.96334263,  1.79581238,
          1.14369894, -2.78185838],
        [-1.4758462 ,  0.44923287,  2.34544218,  1.2061219 ,
          0.95712489,  0.00515746],
        [-2.47194669,  1.17502862,  0.71935262,  0.38647812,
         -1.42678408, -0.14988427]],

       [[-3.90708734, -1.50393033,  0.89809925, -0.72344327,
          0.50707386, -4.46805616],
        [ 0.99705961,  0

## Level 3 — Transformers

### Exercise 10: QK^T attention



In [43]:
B,T,H,F = 2,5,4,8
Q=np.random.randn(B,T,H,F)
K=np.random.randn(B,T,H,F)
Q.shape

(2, 5, 4, 8)

In [46]:
# Your answer
ATT = np.einsum("bthf,bkhf->bhtk",Q,K) #wrong --- if the output doesnt have the letter this means it gets multiplied and summed (dot product over that layer)
ATT.shape

(2, 4, 5, 5)

### Exercise 11: Apply attention weights

In [47]:
V=np.random.randn(B,T,F)
attn=np.random.rand(B,T,T)

In [51]:
# Your answer
np.einsum("btk,bkf->btf",attn,V)

array([[[-0.8303848 , -0.04763211, -0.31696813, -2.13544413,
         -0.16372919,  1.15946063,  2.07678948,  0.16183983],
        [-1.22663352,  0.50011216, -0.27928876, -1.54192144,
         -0.25923862,  0.95137783,  1.72096259,  0.86906677],
        [ 0.08877125, -0.76846931, -0.34730686, -2.48462296,
          0.16884755,  1.04383583,  1.68062541, -0.67326969],
        [ 0.3519394 , -2.22919293,  0.53874363, -2.40298581,
         -1.1132836 ,  1.15248286,  0.74639116, -1.06632865],
        [-0.41215462, -0.38430125, -0.23957156, -1.18448192,
         -0.54306809,  1.0064461 ,  2.02586731, -1.2207006 ]],

       [[ 0.21567966, -0.05205103,  0.93120111, -1.46648235,
          1.78182763,  1.15155086,  1.25847268, -1.61574345],
        [ 1.42459726, -1.49308146, -0.3327133 , -1.54235529,
          0.21241892,  1.95554222,  2.32508865, -1.68886691],
        [ 0.90137169, -1.19974402, -0.09909224,  0.2945615 ,
          1.73685029,  1.0762941 ,  1.16793287, -2.19417153],
        [ 1.40

### Exercise 12: Rearrange multi-head outputs

In [53]:
X=np.random.randn(2,10,4,16)

In [114]:
# Your answer
final = np.einsum("bthf->bthf", X)  # placeholder, let's correct it properly

# Correct einsum:
final = np.einsum("bthf->bt(hf)", X)
print(final.shape)

#einsum cannot do this correctly

ValueError: invalid subscript '(' in einstein sum subscripts string, subscripts must be letters