<a href="https://colab.research.google.com/github/jcmachicao/MachineLearningAvanzado_UC_2024/blob/main/UC__MLADV__LLMSimulation_JCM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [2]:
def scaled_dot_product_attention(query, key, value):
    # Compute the dot product of query and key
    dot_product = np.dot(query, key.T)

    # Scale the dot product
    scaling_factor = np.sqrt(query.shape[-1])
    scaled_dot_product = dot_product / scaling_factor

    # Compute the attention weights
    attention_weights = softmax(scaled_dot_product)

    # Compute the weighted sum of values
    attended_values = np.dot(attention_weights, value)

    return attention_weights, attended_values

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=-1, keepdims=True)

In [3]:
# Input paragraph represented as embeddings
paragraph = np.array([
    [0.1, 0.2, 0.3],
    [0.4, 0.5, 0.6],
    [0.7, 0.8, 0.9]
])

In [4]:
# Split the embeddings into heads (assuming 2 heads for simplicity)
num_heads = 2
head_size = paragraph.shape[-1] // num_heads
query_heads = np.array_split(paragraph, num_heads, axis=-1)
key_heads = np.array_split(paragraph, num_heads, axis=-1)
value_heads = np.array_split(paragraph, num_heads, axis=-1)

# Perform multihead attention
output_heads = []
attention_weights_heads = []
for i in range(num_heads):
    attention_weights, attended_values = scaled_dot_product_attention(query_heads[i], key_heads[i], value_heads[i])
    output_heads.append(attended_values)
    attention_weights_heads.append(attention_weights)

# Concatenate the outputs from different heads
output = np.concatenate(output_heads, axis=-1)
attention_weights = np.concatenate(attention_weights_heads, axis=0)

In [6]:
# Print the results
print("Original Paragraph:")
print(paragraph)
print("\nMultihead Attended Output:")
print(output)
print("\nAttention Weights:")
print(attention_weights)

Original Paragraph:
[[0.1 0.2 0.3]
 [0.4 0.5 0.6]
 [0.7 0.8 0.9]]

Multihead Attended Output:
[[0.41271934 0.51271934 0.61797574]
 [0.43795362 0.53795362 0.63580695]
 [0.46258873 0.56258873 0.6533541 ]]

Attention Weights:
[[0.31235921 0.33288379 0.354757  ]
 [0.27208401 0.32931993 0.39859606]
 [0.2345047  0.32236152 0.44313378]
 [0.30382285 0.33243515 0.36374199]
 [0.2754406  0.32976227 0.39479712]
 [0.24838726 0.32537849 0.42623425]]
