In [17]:
import numpy as np

**Step 1: Create the embeding representation of our sentence**

In [18]:
word_embeddings = {
    'she':    np.array([0.9, 0.2, 0.1, 0.1]),  # High first dim for subject
    'likes':  np.array([0.1, 0.9, 0.2, 0.1]),  # High second dim for verb
    'coffee': np.array([0.1, 0.2, 0.9, 0.1])   # High third dim for object
}

In [19]:
print("Step 1 - Word Embeddings")
for word, embed in word_embeddings.items():
    print(f"{word}: {embed}")

Step 1 - Word Embeddings
she: [0.9 0.2 0.1 0.1]
likes: [0.1 0.9 0.2 0.1]
coffee: [0.1 0.2 0.9 0.1]


**Step 2: Create input matrix X**

In [20]:
X = np.vstack([word_embeddings['she'], 
               word_embeddings['likes'], 
               word_embeddings['coffee']])
X

array([[0.9, 0.2, 0.1, 0.1],
       [0.1, 0.9, 0.2, 0.1],
       [0.1, 0.2, 0.9, 0.1]])

In [21]:
print("\nStep 2: Input Matrix X")
print(X)


Step 2: Input Matrix X
[[0.9 0.2 0.1 0.1]
 [0.1 0.9 0.2 0.1]
 [0.1 0.2 0.9 0.1]]


**Step 3: Initialization of W_q, W_k, W_v**

In [22]:
W_q = np.array([[0.9, 0.1, 0.1, 0.1],
                [0.1, 0.9, 0.1, 0.1],
                [0.1, 0.1, 0.9, 0.1],
                [0.1, 0.1, 0.1, 0.9]])

W_k = np.array([[0.9, 0.1, 0.1, 0.1],
                [0.1, 0.9, 0.1, 0.1],
                [0.1, 0.1, 0.9, 0.1],
                [0.1, 0.1, 0.1, 0.9]])

W_v = np.array([[0.8, 0.2, 0.1, 0.1],
                [0.2, 0.8, 0.2, 0.1],
                [0.1, 0.2, 0.8, 0.1],
                [0.1, 0.1, 0.1, 0.9]])
W_q, W_k, W_v

(array([[0.9, 0.1, 0.1, 0.1],
        [0.1, 0.9, 0.1, 0.1],
        [0.1, 0.1, 0.9, 0.1],
        [0.1, 0.1, 0.1, 0.9]]),
 array([[0.9, 0.1, 0.1, 0.1],
        [0.1, 0.9, 0.1, 0.1],
        [0.1, 0.1, 0.9, 0.1],
        [0.1, 0.1, 0.1, 0.9]]),
 array([[0.8, 0.2, 0.1, 0.1],
        [0.2, 0.8, 0.2, 0.1],
        [0.1, 0.2, 0.8, 0.1],
        [0.1, 0.1, 0.1, 0.9]]))

In [23]:
Q = np.dot(X, W_q)
K = np.dot(X, W_k)
V = np.dot(X, W_v)
Q, K, V

(array([[0.85, 0.29, 0.21, 0.21],
        [0.21, 0.85, 0.29, 0.21],
        [0.21, 0.29, 0.85, 0.21]]),
 array([[0.85, 0.29, 0.21, 0.21],
        [0.21, 0.85, 0.29, 0.21],
        [0.21, 0.29, 0.85, 0.21]]),
 array([[0.78, 0.37, 0.22, 0.21],
        [0.29, 0.79, 0.36, 0.21],
        [0.22, 0.37, 0.78, 0.21]]))

**Step 4: Q, K, V matrices**

In [24]:
print("\nStep 4: Q, K, V matrices")
print("Q (Query matrix):")
print(np.round(Q, 3))
print("\nK (Key matrix):")
print(np.round(K, 3))
print("\nV (Value matrix):")
print(np.round(V, 3))


Step 4: Q, K, V matrices
Q (Query matrix):
[[0.85 0.29 0.21 0.21]
 [0.21 0.85 0.29 0.21]
 [0.21 0.29 0.85 0.21]]

K (Key matrix):
[[0.85 0.29 0.21 0.21]
 [0.21 0.85 0.29 0.21]
 [0.21 0.29 0.85 0.21]]

V (Value matrix):
[[0.78 0.37 0.22 0.21]
 [0.29 0.79 0.36 0.21]
 [0.22 0.37 0.78 0.21]]


**Step 5: Compute attention scores (Q × K^T)**

In [25]:
scores = np.dot(Q, K.T)
print("\nStep 5: Raw attention scores (Q × K^T)")
print(np.round(scores, 3))


Step 5: Raw attention scores (Q × K^T)
[[0.895 0.53  0.485]
 [0.53  0.895 0.581]
 [0.485 0.581 0.895]]


**Step 6: Scale the scores**

In [26]:
d_k = K.shape[1]  # dimension of key vectors
scaled_scores = scores / np.sqrt(d_k)
print("\nStep 6: Scaled attention scores (divided by sqrt(d_k))")
print(np.round(scaled_scores, 3))


Step 6: Scaled attention scores (divided by sqrt(d_k))
[[0.447 0.265 0.243]
 [0.265 0.447 0.291]
 [0.243 0.291 0.447]]


**Step 7: Apply softmax to get attention weights**

In [27]:
exp_scores = np.exp(scaled_scores)
attention_weights = exp_scores / exp_scores.sum(axis=1, keepdims=True)
print("\nStep 7: Attention weights after softmax")
print(np.round(attention_weights, 3))


Step 7: Attention weights after softmax
[[0.378 0.315 0.308]
 [0.31  0.372 0.318]
 [0.305 0.32  0.375]]


**Step 8: Compute final output**

In [28]:
output = np.dot(attention_weights, V)
print("\nStep 8: Final output")
print(np.round(output, 3))


Step 8: Final output
[[0.454 0.502 0.436 0.21 ]
 [0.42  0.526 0.45  0.21 ]
 [0.413 0.504 0.475 0.21 ]]


**Visualization of attention weights**

In [32]:
print("\nAttention Weight Visualization:")
words = ['she', 'likes', 'coffee']
for i, word1 in enumerate(words):
    print(f"\n{word1} attending to:")
    for j, word2 in enumerate(words):
        weight = attention_weights[i][j]
        bars = '█' * int(weight * 20)
        print(f"{word2:>8}: {bars} ({weight:.3f})")


Attention Weight Visualization:

she attending to:
     she: ███████ (0.378)
   likes: ██████ (0.315)
  coffee: ██████ (0.308)

likes attending to:
     she: ██████ (0.310)
   likes: ███████ (0.372)
  coffee: ██████ (0.318)

coffee attending to:
     she: ██████ (0.305)
   likes: ██████ (0.320)
  coffee: ███████ (0.375)


In [34]:
!pip install colorama
from colorama import Fore, Style

words = ['she', 'likes', 'coffee']

# Function to get color based on weight for emphasis
def get_color(weight):
    if weight > 0.5:
        return Fore.GREEN
    elif weight > 0.3:
        return Fore.YELLOW
    else:
        return Fore.RED

print("\nAttention Weight Visualization:\n")
for i, word1 in enumerate(words):
    print(f"{Style.BRIGHT}{word1} attending to:{Style.RESET_ALL}")
    for j, word2 in enumerate(words):
        weight = attention_weights[i][j]
        bars = '█' * int(weight * 20)
        color = get_color(weight)
        print(f"{word2:>8}: {color}{bars}{Style.RESET_ALL} ({weight:.3f})")
    print("\n" + "-" * 30)  # Separator between words


Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

Attention Weight Visualization:

[1mshe attending to:[0m
     she: [33m███████[0m (0.378)
   likes: [33m██████[0m (0.315)
  coffee: [33m██████[0m (0.308)

------------------------------
[1mlikes attending to:[0m
     she: [33m██████[0m (0.310)
   likes: [33m███████[0m (0.372)
  coffee: [33m██████[0m (0.318)

------------------------------
[1mcoffee attending to:[0m
     she: [33m██████[0m (0.305)
   likes: [33m██████[0m (0.320)
  coffee: [33m███████[0m (0.375)

------------------------------


ValueError: axes don't match array