# Algorithm 6: Pair Weighted Averaging (Boltz)

Aggregates pair information using learned weights.

## Source Code Location
- **File**: `Boltz-Ref-src/boltz-official/src/boltz/model/layers/pair_averaging.py`

In [None]:
import numpy as np
np.random.seed(42)

def layer_norm(x, eps=1e-5):
    mean = np.mean(x, axis=-1, keepdims=True)
    var = np.var(x, axis=-1, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)

def softmax(x, axis=-1):
    x_max = np.max(x, axis=axis, keepdims=True)
    exp_x = np.exp(x - x_max)
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)

In [None]:
def pair_weighted_averaging(s, z, num_heads=8, c=32):
    """
    Pair Weighted Averaging.
    
    Updates single representation using weighted sum of pair features.
    
    Args:
        s: Single representation [N, c_s]
        z: Pair representation [N, N, c_z]
        num_heads: Number of heads
        c: Head dimension
    
    Returns:
        Update to single representation [N, c_s]
    """
    N, c_s = s.shape
    c_z = z.shape[-1]
    
    print(f"Pair Weighted Averaging")
    print(f"="*50)
    print(f"Single: [{N}, {c_s}], Pair: [{N}, {N}, {c_z}]")
    
    s_norm = layer_norm(s)
    z_norm = layer_norm(z)
    
    # Compute attention weights from single
    W_q = np.random.randn(c_s, num_heads) * (c_s ** -0.5)
    W_k = np.random.randn(c_s, num_heads) * (c_s ** -0.5)
    
    q = s_norm @ W_q  # [N, H]
    k = s_norm @ W_k  # [N, H]
    
    weights = np.einsum('ih,jh->ijh', q, k)  # [N, N, H]
    weights = softmax(weights, axis=1)
    
    # Value from pair representation
    W_v = np.random.randn(c_z, num_heads, c) * (c_z ** -0.5)
    v = np.einsum('ijc,chd->ijhd', z_norm, W_v)  # [N, N, H, c]
    
    # Weighted sum
    output = np.einsum('ijh,ijhd->ihd', weights, v)  # [N, H, c]
    output = output.reshape(N, -1)
    
    # Project back
    W_o = np.random.randn(num_heads * c, c_s) * ((num_heads * c) ** -0.5)
    output = output @ W_o
    
    print(f"Output: {output.shape}")
    
    return output

In [None]:
# Test
print("Test: Pair Weighted Averaging")
print("="*60)

N = 32
c_s = 128
c_z = 64

s = np.random.randn(N, c_s)
z = np.random.randn(N, N, c_z)

output = pair_weighted_averaging(s, z, num_heads=8, c=16)

print(f"\nOutput shape: {output.shape}")
print(f"Output finite: {np.isfinite(output).all()}")

## Key Insights

1. **Pairâ†’Single**: Aggregates pair information to single
2. **Learned Weights**: Attention-based aggregation
3. **Multi-head**: Multiple heads for diverse aggregation
4. **Complementary**: Works with single attention for bidirectional flow