# Algorithm 7: Pairformer Module (Boltz)

Main representation learning module using triangle operations and attention.

## Source Code Location
- **File**: `Boltz-Ref-src/boltz-official/src/boltz/model/modules/trunk.py`
- **Class**: `PairformerModule`

In [None]:
import numpy as np
np.random.seed(42)

def layer_norm(x, eps=1e-5):
    mean = np.mean(x, axis=-1, keepdims=True)
    var = np.var(x, axis=-1, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)

def softmax(x, axis=-1):
    x_max = np.max(x, axis=axis, keepdims=True)
    exp_x = np.exp(x - x_max)
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def swish(x):
    return x * sigmoid(x)

In [None]:
def triangle_mult_outgoing(z, c=64):
    """Triangle multiplication outgoing."""
    c_z = z.shape[-1]
    z_norm = layer_norm(z)
    
    W_a = np.random.randn(c_z, c) * (c_z ** -0.5)
    W_b = np.random.randn(c_z, c) * (c_z ** -0.5)
    W_g = np.random.randn(c_z, c_z) * (c_z ** -0.5)
    
    a = sigmoid(z_norm @ W_a) * (z_norm @ W_a)
    b = sigmoid(z_norm @ W_b) * (z_norm @ W_b)
    g = sigmoid(z_norm @ W_g)
    
    output = np.einsum('ikc,jkc->ijc', a, b)
    output = layer_norm(output)
    
    W_o = np.random.randn(c, c_z) * (c ** -0.5)
    return (output @ W_o) * g

def triangle_mult_incoming(z, c=64):
    """Triangle multiplication incoming."""
    c_z = z.shape[-1]
    z_norm = layer_norm(z)
    
    W_a = np.random.randn(c_z, c) * (c_z ** -0.5)
    W_b = np.random.randn(c_z, c) * (c_z ** -0.5)
    W_g = np.random.randn(c_z, c_z) * (c_z ** -0.5)
    
    a = sigmoid(z_norm @ W_a) * (z_norm @ W_a)
    b = sigmoid(z_norm @ W_b) * (z_norm @ W_b)
    g = sigmoid(z_norm @ W_g)
    
    output = np.einsum('kic,kjc->ijc', a, b)
    output = layer_norm(output)
    
    W_o = np.random.randn(c, c_z) * (c ** -0.5)
    return (output @ W_o) * g

In [None]:
def triangle_attention(z, num_heads=4, c=16):
    """Simplified triangle attention."""
    c_z = z.shape[-1]
    z_norm = layer_norm(z)
    
    W_q = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_k = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_v = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    W_g = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    
    q = np.einsum('ijc,chd->ijhd', z_norm, W_q)
    k = np.einsum('ijc,chd->ijhd', z_norm, W_k)
    v = np.einsum('ijc,chd->ijhd', z_norm, W_v)
    g = sigmoid(np.einsum('ijc,chd->ijhd', z_norm, W_g))
    
    attn = np.einsum('ijhd,ikhd->ijkh', q, k) / np.sqrt(c)
    attn = softmax(attn, axis=2)
    
    output = np.einsum('ijkh,ikhd->ijhd', attn, v) * g
    
    W_o = np.random.randn(num_heads, c, c_z) * ((num_heads * c) ** -0.5)
    return np.einsum('ijhd,hdc->ijc', output, W_o)

In [None]:
def single_attention_pair_bias(s, z, num_heads=8, c=16):
    """Single attention with pair bias."""
    c_s = s.shape[-1]
    c_z = z.shape[-1]
    
    s_norm = layer_norm(s)
    z_norm = layer_norm(z)
    
    W_q = np.random.randn(c_s, num_heads, c) * (c_s ** -0.5)
    W_k = np.random.randn(c_s, num_heads, c) * (c_s ** -0.5)
    W_v = np.random.randn(c_s, num_heads, c) * (c_s ** -0.5)
    W_b = np.random.randn(c_z, num_heads) * (c_z ** -0.5)
    
    q = np.einsum('ic,chd->ihd', s_norm, W_q)
    k = np.einsum('jc,chd->jhd', s_norm, W_k)
    v = np.einsum('jc,chd->jhd', s_norm, W_v)
    b = np.einsum('ijc,ch->ijh', z_norm, W_b)
    
    attn = np.einsum('ihd,jhd->ijh', q, k) / np.sqrt(c) + b
    attn = softmax(attn, axis=1)
    
    output = np.einsum('ijh,jhd->ihd', attn, v)
    
    W_o = np.random.randn(num_heads, c, c_s) * ((num_heads * c) ** -0.5)
    return np.einsum('ihd,hdc->ic', output, W_o)

In [None]:
def transition(x, n=4):
    """Transition block with SwiGLU."""
    c = x.shape[-1]
    x_norm = layer_norm(x)
    
    W_up = np.random.randn(c, c * n * 2) * (c ** -0.5)
    hidden = x_norm @ W_up
    a, b = np.split(hidden, 2, axis=-1)
    hidden = swish(a) * b
    
    W_down = np.random.randn(c * n, c) * ((c * n) ** -0.5)
    return hidden @ W_down

In [None]:
def pairformer_block(s, z):
    """Single Pairformer block."""
    # Pair track
    z = z + triangle_mult_outgoing(z, c=32)
    z = z + triangle_mult_incoming(z, c=32)
    z = z + triangle_attention(z, num_heads=4, c=16)
    z = z + transition(z, n=2)
    
    # Single track
    s = s + single_attention_pair_bias(s, z, num_heads=4, c=16)
    s = s + transition(s, n=4)
    
    return s, z

def pairformer_module(s, z, num_blocks=4):
    """Pairformer Module."""
    print(f"Pairformer Module ({num_blocks} blocks)")
    print(f"="*50)
    
    for i in range(num_blocks):
        s, z = pairformer_block(s, z)
        print(f"  Block {i+1}: s_norm={np.linalg.norm(s):.2f}, z_norm={np.linalg.norm(z):.2f}")
    
    return s, z

In [None]:
# Test
print("Test: Pairformer Module")
print("="*60)

N = 24
c_s = 64
c_z = 32

s = np.random.randn(N, c_s) * 0.1
z = np.random.randn(N, N, c_z) * 0.1

s_out, z_out = pairformer_module(s, z, num_blocks=2)

print(f"\nOutput: s={s_out.shape}, z={z_out.shape}")
print(f"Finite: {np.isfinite(s_out).all() and np.isfinite(z_out).all()}")

## Key Insights

1. **Triangle Operations**: Encode geometric constraints
2. **Bidirectional Flow**: Singleâ†”Pair information exchange
3. **SwiGLU Transitions**: Modern activation function
4. **48 Blocks**: Deep iterative refinement