# 4. Mécanisme d’Attention

Implémentation de l’attention scaled dot-product en NumPy.

In [None]:
import numpy as np

def softmax(x, axis=-1):
    exp_x = np.exp(x - np.max(x, axis=axis, keepdims=True))
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

def scaled_dot_product_attention(Q, K, V, mask=None):
    d_k = Q.shape[-1]
    scores = np.dot(Q, K.T) / np.sqrt(d_k)
    if mask is not None:
        scores = scores * mask + -1e9 * (1 - mask)
    weights = softmax(scores, axis=-1)
    return np.dot(weights, V)

# Exemple
np.random.seed(42)
seq_len, d_k, d_v = 4, 8, 8
Q = np.random.randn(seq_len, d_k)
K = np.random.randn(seq_len, d_k)
V = np.random.randn(seq_len, d_v)

# Masque causal
mask = np.tril(np.ones((seq_len, seq_len)))
out = scaled_dot_product_attention(Q, K, V, mask=mask)
print("Forme de la sortie :", out.shape)