# Algorithm 11: Triangle Multiplication (Outgoing Edges)

Triangle Multiplication updates the pair representation by aggregating information along edges that form triangles. The "outgoing" variant updates edge (i,j) by combining information from edges (i,k) for all k.

## Algorithm Pseudocode

![Triangle Multiplication Outgoing](../imgs/algorithms/TriangleMultiplicationOutgoing.png)

## Source Code Location
- **File**: `AF2-source-code/model/modules.py`
- **Class**: `TriangleMultiplication`
- **Lines**: 1250-1337

## Geometric Intuition

For a protein with residues i, j, k:
- If we know distance(i,k) and distance(j,k) for all k
- We can infer constraints on distance(i,j)

The triangle inequality tells us:
$$|d(i,k) - d(j,k)| \leq d(i,j) \leq d(i,k) + d(j,k)$$

Triangle multiplication learns to propagate such geometric constraints through the pair representation.

In [None]:
import numpy as np

np.random.seed(42)

## NumPy Implementation

In [None]:
def layer_norm(x, axis=-1, eps=1e-5):
    """Layer normalization."""
    mean = np.mean(x, axis=axis, keepdims=True)
    var = np.var(x, axis=axis, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)


def sigmoid(x):
    """Sigmoid activation."""
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))


def triangle_multiplication_outgoing(pair_act, pair_mask, num_intermediate=128):
    """
    Triangle Multiplication (Outgoing Edges).
    
    Algorithm 11 from AlphaFold2 supplementary materials.
    
    Updates z_ij by aggregating information from z_ik for all k.
    Equation: 'ikc,jkc->ijc' (outgoing edges)
    
    Args:
        pair_act: Pair activations, shape [N_res, N_res, c_z]
        pair_mask: Pair mask, shape [N_res, N_res]
        num_intermediate: Intermediate channel dimension
    
    Returns:
        Updated pair activations, shape [N_res, N_res, c_z]
    """
    N_res, _, c_z = pair_act.shape
    
    # Expand mask
    mask = pair_mask[:, :, None]
    
    # Step 1: Layer normalization (Line 1)
    act = layer_norm(pair_act, axis=-1)
    input_act = act  # Save for gating
    
    print(f"After LayerNorm: {act.shape}")
    
    # Step 2: Left projection with gating (Lines 2-3)
    left_proj_w = np.random.randn(c_z, num_intermediate) * 0.01
    left_gate_w = np.random.randn(c_z, num_intermediate) * 0.01
    left_gate_b = np.ones(num_intermediate)  # Bias init to 1
    
    left_proj_act = mask * np.einsum('ijc,cd->ijd', act, left_proj_w)
    left_gate = sigmoid(np.einsum('ijc,cd->ijd', act, left_gate_w) + left_gate_b)
    left_proj_act = left_proj_act * left_gate
    
    print(f"Left projection: {left_proj_act.shape}")
    
    # Step 3: Right projection with gating (Lines 4-5)
    right_proj_w = np.random.randn(c_z, num_intermediate) * 0.01
    right_gate_w = np.random.randn(c_z, num_intermediate) * 0.01
    right_gate_b = np.ones(num_intermediate)
    
    right_proj_act = mask * np.einsum('ijc,cd->ijd', act, right_proj_w)
    right_gate = sigmoid(np.einsum('ijc,cd->ijd', act, right_gate_w) + right_gate_b)
    right_proj_act = right_proj_act * right_gate
    
    print(f"Right projection: {right_proj_act.shape}")
    
    # Step 4: Triangle multiplication (Line 6)
    # Outgoing edges: 'ikc,jkc->ijc'
    # For each (i,j), aggregate over all k: left[i,k] * right[j,k]
    act = np.einsum('ikc,jkc->ijc', left_proj_act, right_proj_act)
    
    print(f"After triangle multiplication: {act.shape}")
    
    # Step 5: Center layer normalization (Line 7)
    act = layer_norm(act, axis=-1)
    
    # Step 6: Output projection (Line 8)
    output_w = np.random.randn(num_intermediate, c_z) * 0.01
    act = np.einsum('ijc,cd->ijd', act, output_w)
    
    # Step 7: Output gating (Lines 9-10)
    output_gate_w = np.random.randn(c_z, c_z) * 0.01
    output_gate_b = np.ones(c_z)
    gate = sigmoid(np.einsum('ijc,cd->ijd', input_act, output_gate_w) + output_gate_b)
    act = act * gate
    
    print(f"Final output: {act.shape}")
    
    return act

## Test Example

In [None]:
# Test parameters
N_res = 32     # Number of residues
c_z = 128      # Pair channel dimension
c_hidden = 128 # Intermediate dimension

# Create test inputs
pair_act = np.random.randn(N_res, N_res, c_z).astype(np.float32)
pair_mask = np.ones((N_res, N_res), dtype=np.float32)

# Mask out some pairs (e.g., padding)
pair_mask[-4:, :] = 0
pair_mask[:, -4:] = 0

print(f"Pair activations shape: {pair_act.shape}")
print(f"Pair mask shape: {pair_mask.shape}")
print(f"Valid pairs: {int(pair_mask.sum())} / {N_res * N_res}")
print()

In [None]:
# Run the algorithm
output = triangle_multiplication_outgoing(
    pair_act, 
    pair_mask, 
    num_intermediate=c_hidden
)

print(f"\nOutput statistics: mean={output.mean():.6f}, std={output.std():.6f}")

## Compare with Incoming Edges

Algorithm 12 uses a different aggregation pattern:
- **Outgoing (Alg 11)**: `'ikc,jkc->ijc'` - aggregate over third node k using edges (i,k) and (j,k)
- **Incoming (Alg 12)**: `'kjc,kic->ijc'` - aggregate over third node k using edges (k,j) and (k,i)

In [None]:
def triangle_multiplication_incoming(pair_act, pair_mask, num_intermediate=128):
    """
    Triangle Multiplication (Incoming Edges).
    
    Algorithm 12: Uses equation 'kjc,kic->ijc'
    """
    N_res, _, c_z = pair_act.shape
    mask = pair_mask[:, :, None]
    
    act = layer_norm(pair_act, axis=-1)
    input_act = act
    
    # Projections
    left_proj_w = np.random.randn(c_z, num_intermediate) * 0.01
    right_proj_w = np.random.randn(c_z, num_intermediate) * 0.01
    
    left_proj_act = mask * np.einsum('ijc,cd->ijd', act, left_proj_w)
    right_proj_act = mask * np.einsum('ijc,cd->ijd', act, right_proj_w)
    
    # Incoming edges: 'kjc,kic->ijc'
    # For (i,j), aggregate over k: left[k,j] * right[k,i]
    act = np.einsum('kjc,kic->ijc', left_proj_act, right_proj_act)
    
    act = layer_norm(act, axis=-1)
    
    output_w = np.random.randn(num_intermediate, c_z) * 0.01
    act = np.einsum('ijc,cd->ijd', act, output_w)
    
    return act

# Test incoming version
output_incoming = triangle_multiplication_incoming(pair_act, pair_mask, c_hidden)
print(f"Incoming output shape: {output_incoming.shape}")
print(f"Incoming statistics: mean={output_incoming.mean():.6f}, std={output_incoming.std():.6f}")

## Source Code Reference

```python
# From AF2-source-code/model/modules.py

class TriangleMultiplication(hk.Module):
  """Triangle multiplication layer ("outgoing" or "incoming").

  Jumper et al. (2021) Suppl. Alg. 11 "TriangleMultiplicationOutgoing"
  Jumper et al. (2021) Suppl. Alg. 12 "TriangleMultiplicationIncoming"
  """

  def __call__(self, act, mask, is_training=True):
    c = self.config
    gc = self.global_config

    mask = mask[..., None]
    act = hk.LayerNorm(axis=[-1], create_scale=True, create_offset=True,
                       name='layer_norm_input')(act)
    input_act = act

    # Projections with gating
    left_proj_act = mask * left_projection(act)
    right_proj_act = mask * right_projection(act)
    left_proj_act *= sigmoid(left_gate(act))
    right_proj_act *= sigmoid(right_gate(act))

    # Key difference between algorithms:
    # "Outgoing" edges equation: 'ikc,jkc->ijc'
    # "Incoming" edges equation: 'kjc,kic->ijc'
    act = jnp.einsum(c.equation, left_proj_act, right_proj_act)

    act = hk.LayerNorm(...)(act)
    act = output_projection(act)
    act *= sigmoid(gating_linear(input_act))

    return act
```

## Key Insights

1. **Triangle Constraint Propagation**: Each edge (i,j) is updated based on all triangles it participates in.

2. **Gating**: Sigmoid gates control information flow, initialized near 0.73 (sigmoid(1)).

3. **Two Variants**: Outgoing and incoming provide complementary views of triangle constraints.

4. **Computational Cost**: O(N³ × c) for the einsum operation, where N is sequence length.