# Algorithm 9: Triangle Attention (Boltz)

Attention mechanism for pair representations.

## Source Code Location
- **File**: `Boltz-Ref-src/boltz-official/src/boltz/model/layers/triangular_attention/`

In [None]:
import numpy as np
np.random.seed(42)

def layer_norm(x, eps=1e-5):
    mean = np.mean(x, axis=-1, keepdims=True)
    var = np.var(x, axis=-1, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)

def softmax(x, axis=-1):
    x_max = np.max(x, axis=axis, keepdims=True)
    exp_x = np.exp(x - x_max)
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

In [None]:
def triangle_attention_starting(z, num_heads=4, c=32):
    """
    Triangle Attention (Starting Node).
    
    Row-wise attention over pair representation.
    """
    N = z.shape[0]
    c_z = z.shape[-1]
    
    print(f"Triangle Attention (Starting)")
    print(f"="*50)
    
    z_norm = layer_norm(z)
    
    W_q = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_k = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_v = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_g = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_b = np.random.randn(c_z, num_heads) * (c_z ** -0.5)
    
    q = np.einsum('ijc,chd->ijhd', z_norm, W_q)
    k = np.einsum('ijc,chd->ijhd', z_norm, W_k)
    v = np.einsum('ijc,chd->ijhd', z_norm, W_v)
    g = sigmoid(np.einsum('ijc,chd->ijhd', z_norm, W_g))
    b = np.einsum('ijc,ch->ijh', z_norm, W_b)
    
    # Attention: q[i,j] attends to k[i,k]
    attn = np.einsum('ijhd,ikhd->ijkh', q, k) / np.sqrt(c)
    attn = attn + b[None, :, :, :].transpose(0, 2, 1, 3)
    attn = softmax(attn, axis=2)
    
    output = np.einsum('ijkh,ikhd->ijhd', attn, v) * g
    
    W_o = np.random.randn(num_heads, c, c_z) * ((num_heads * c) ** -0.5)
    output = np.einsum('ijhd,hdc->ijc', output, W_o)
    
    print(f"Output: {output.shape}")
    return output

def triangle_attention_ending(z, num_heads=4, c=32):
    """
    Triangle Attention (Ending Node).
    
    Column-wise attention.
    """
    N = z.shape[0]
    c_z = z.shape[-1]
    
    print(f"Triangle Attention (Ending)")
    print(f"="*50)
    
    # Transpose, apply starting node attention, transpose back
    z_t = z.transpose(1, 0, 2)
    output_t = triangle_attention_starting(z_t, num_heads, c)
    return output_t.transpose(1, 0, 2)

In [None]:
# Test
print("Test: Triangle Attention")
print("="*60)

N = 24
c_z = 64

z = np.random.randn(N, N, c_z) * 0.1

out_start = triangle_attention_starting(z, num_heads=4, c=16)
print(f"Starting finite: {np.isfinite(out_start).all()}")

print()

out_end = triangle_attention_ending(z, num_heads=4, c=16)
print(f"Ending finite: {np.isfinite(out_end).all()}")

## Key Insights

1. **Row/Column Attention**: Starting=row, Ending=column
2. **Pair Bias**: Uses pair representation as attention bias
3. **Gating**: Sigmoid gating on outputs
4. **Complementary**: Both directions for full coverage