In [1]:
import numpy as np
import pandas as pd
np.random.seed(123)

In [2]:
# Dimensions de practica de 256 -> 4
d_model = 4
d_ff = 5
d_k = 3
d_v = 3

In [3]:
# Word embeddings (random, aun sin dataset)
me_ = np.random.randn(d_model)
gusta_ = np.random.randn(d_model)
estudiar_ = np.random.randn(d_model)
inteligencia_ = np.random.randn(d_model)
artificial_ = np.random.randn(d_model)

In [5]:
# Positional embedings
pos_1 = np.array([0.3, -0.9, -0.6, 0.8])
pos_2 = np.array([-.6, 0.8, -0.9, 0.15])
pos_3 = np.array([0.8, -0.5, -0.9, 0.5])
pos_4 = np.array([-0.95, 0.2, -0.4, -0.9])
pos_5 = np.array([0.95, 0.16, 0.2, -0.95])

In [6]:
me_embed_pos = me_ + pos_1
gusta_embed_pos = gusta_ + pos_2
estudiar_embed_pos = estudiar_ + pos_3
inteligencia_embed_pos = inteligencia_ + pos_4
artificial_embed_pos = artificial_ + pos_5

In [7]:
# Projection matrices
W_q = np.random.randn(d_model, d_k) * 0.1
W_k = np.random.randn(d_model, d_k) * 0.1
W_v = np.random.randn(d_model, d_v) * 0.1

In [8]:
me_query = me_embed_pos @ W_q
me_key = me_embed_pos @ W_k
me_value = me_embed_pos @ W_v

gusta_query = gusta_embed_pos @ W_q
gusta_key = gusta_embed_pos @ W_k
gusta_value = gusta_embed_pos @ W_v

estudiar_query = estudiar_embed_pos @ W_q
estudiar_key = estudiar_embed_pos @ W_k
estudiar_value = estudiar_embed_pos @ W_v

inteligencia_query = inteligencia_embed_pos @ W_q
inteligencia_key = inteligencia_embed_pos @ W_k
inteligencia_value = inteligencia_embed_pos @ W_v

artificial_query = artificial_embed_pos @ W_q
artificial_key = artificial_embed_pos @ W_k
artificial_value = artificial_embed_pos @ W_v

In [10]:
# Similarities
def softmax(x):
    x -= np.max(x, axis=1, keepdims=True)
    return np.exp(x) / np.exp(x).sum(axis=1, keepdims=True)

In [13]:
# .T = Transpuesta
me_alpha_me = me_query @ me_key.T
me_alpha_gusta = me_query @ gusta_key.T
me_alpha_estudiar = me_query @ estudiar_key.T
me_alpha_inteligencia = me_query @ inteligencia_key.T
me_alpha_artificial = me_query @ artificial_key.T

me_alphas = softmax(([[me_alpha_me, me_alpha_gusta,
                     me_alpha_estudiar, me_alpha_inteligencia,
                    me_alpha_artificial]]))

In [16]:
me_attention = np.array([me_alphas[:, 0] * me_value +
                         me_alphas[:, 1] * gusta_value +
                         me_alphas[:, 2] * estudiar_value +
                         me_alphas[:, 3] * inteligencia_value +
                         me_alphas[:, 4] * artificial_value])

In [17]:
me_attention


array([[0.21879221, 0.16280812, 0.11939328]])

In [20]:
X = np.array([[me_embed_pos],
              [gusta_embed_pos],
              [estudiar_embed_pos],
              [inteligencia_embed_pos],
              [artificial_embed_pos]]).reshape(-1, d_model)

In [26]:
Q = X @ W_q
K = X @ W_k
V = X @ W_v
print("Q\n",Q)
print("K\n",K)
print("V\n",V)

Q
 [[-1.43756640e-02 -6.59759165e-02  2.69417101e-01]
 [-7.63886211e-02 -6.72812458e-04  7.86092247e-02]
 [-1.86517395e-01  6.94561065e-01 -1.97482288e-01]
 [ 2.67433306e-02  2.90426435e-01  3.62577608e-01]
 [ 6.66457559e-01  1.85969456e-02 -3.04086030e-01]]
K
 [[ 2.87394374e-01  7.36296509e-02 -8.11605067e-02]
 [ 5.07007664e-01 -2.65817462e-04  3.11318122e-01]
 [-2.73408077e-01 -2.05603482e-01  2.47959689e-01]
 [ 2.16475020e-01 -9.79340587e-03  1.14112445e-02]
 [-6.08324603e-01 -1.64018766e-01  3.24889383e-01]]
V
 [[ 0.07481096  0.0412923  -0.29696888]
 [ 0.51407634  0.58606888 -0.34319929]
 [ 0.18549254  0.02900396 -0.04362257]
 [ 0.21591613  0.04011366 -0.25856286]
 [ 0.092939    0.0992055   1.42992098]]


In [23]:
alphas=softmax(Q@K.T)
print(alphas)

[[0.18465934 0.20560799 0.20719656 0.1905618  0.21197432]
 [0.19215574 0.19488943 0.20586176 0.19462152 0.21247155]
 [0.21907906 0.18486734 0.18774642 0.20570368 0.2026035 ]
 [0.1915041  0.21737526 0.19600622 0.19293412 0.20218031]
 [0.24656792 0.25297203 0.152721   0.22830251 0.11943654]]


In [24]:
attn_scores = alphas @ V
print(attn_scores)

[[ 0.21879221  0.16280812  0.11939328]
 [ 0.21451804  0.15700932  0.12056522]
 [ 0.20949551  0.15118757  0.09982367]
 [ 0.2228798   0.16878617  0.09919218]
 [ 0.23721603  0.18387671 -0.05495064]]
