In [1]:
import numpy as np

def softmax(x):
    """Compute softmax values for each set of scores in x."""
    e_x = np.exp(x - np.max(x)) # Subtract max for numerical stability
    return e_x / e_x.sum()

# 1. Setup the Vectors [Volatility, Trend, Panic]
query_today = np.array([10, -5, 8])   # High Stress
key_2008    = np.array([ 9, -6, 9])   # High Stress (Match)
key_2017    = np.array([ 2, -1, 1])   # Low Stress (Mismatch)

# Values: What actually happened next? (e.g., next day return)
value_2008  = np.array([-10.0]) # Crash continued
value_2017  = np.array([  5.0]) # Rebound

# 2. Step A: Calculate Raw Attention Scores (Dot Product)
# How similar is Today to 2008?
score_2008 = np.dot(query_today, key_2008) 
# How similar is Today to 2017?
score_2017 = np.dot(query_today, key_2017)

scores = np.array([score_2008, score_2017])
print(f"Raw Similarity Scores: {scores}") 
# Output approx: [192, 33]

# 3. Step B: Normalize to Weights (Softmax)
weights = softmax(scores)
print(f"Attention Weights: {weights}")
# Output approx: [0.999..., 0.000...] (Almost 100% focus on 2008)

# 4. Step C: Calculate Context (Weighted Sum)
# This is the 'Fusion' vector passed to the final prediction layer
context = (weights[0] * value_2008) + (weights[1] * value_2017)

print(f"Final Model Context: {context}")
# Output: -9.99... (The model effectively ignores the 2017 rebound data)

Raw Similarity Scores: [192  33]
Attention Weights: [1.00000000e+00 8.85477188e-70]
Final Model Context: [-10.]
