# Algorithm 12: Diffusion Module (Boltz)

Core diffusion-based structure prediction module.

## Source Code Location
- **File**: `Boltz-Ref-src/boltz-official/src/boltz/model/modules/diffusion.py`
- **Class**: `DiffusionModule`

In [None]:
import numpy as np
np.random.seed(42)

def layer_norm(x, eps=1e-5):
    mean = np.mean(x, axis=-1, keepdims=True)
    var = np.var(x, axis=-1, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)

In [None]:
def fourier_embedding(t, dim=256, max_period=10000):
    """Sinusoidal embedding for noise level."""
    half = dim // 2
    freqs = np.exp(-np.log(max_period) * np.arange(half) / half)
    args = t * freqs
    return np.concatenate([np.cos(args), np.sin(args)])

In [None]:
def get_noise_schedule(num_steps=200, sigma_data=16):
    """EDM-style noise schedule."""
    sigma_min = 0.002
    sigma_max = 80.0
    rho = 7
    
    step_indices = np.arange(num_steps + 1)
    sigmas = (sigma_max ** (1/rho) + step_indices / num_steps * 
              (sigma_min ** (1/rho) - sigma_max ** (1/rho))) ** rho
    
    return sigmas

In [None]:
def diffusion_forward(x_0, sigma):
    """Add noise to coordinates."""
    noise = np.random.randn(*x_0.shape)
    x_t = x_0 + sigma * noise
    return x_t, noise

In [None]:
def diffusion_module_forward(x_t, sigma, s, z, sigma_data=16):
    """
    Diffusion Module forward pass (simplified).
    
    Args:
        x_t: Noisy coordinates [N_atoms, 3]
        sigma: Noise level
        s: Single representation [N, c_s]
        z: Pair representation [N, N, c_z]
        sigma_data: Data standard deviation
    
    Returns:
        Predicted clean coordinates [N_atoms, 3]
    """
    N_atoms = x_t.shape[0]
    c_s = s.shape[-1]
    
    print(f"Diffusion Module Forward")
    print(f"="*50)
    print(f"Atoms: {N_atoms}, sigma: {sigma:.4f}")
    
    # EDM preconditioning
    c_skip = sigma_data ** 2 / (sigma ** 2 + sigma_data ** 2)
    c_out = sigma * sigma_data / np.sqrt(sigma ** 2 + sigma_data ** 2)
    c_in = 1 / np.sqrt(sigma ** 2 + sigma_data ** 2)
    c_noise = 0.25 * np.log(sigma)
    
    print(f"  c_skip: {c_skip:.4f}, c_out: {c_out:.4f}")
    
    # Scale input
    x_scaled = c_in * x_t
    
    # Noise embedding
    t_emb = fourier_embedding(c_noise, dim=64)
    
    # Simplified network: embed -> transform -> predict
    W_x = np.random.randn(3, c_s) * (3 ** -0.5)
    x_emb = x_scaled @ W_x  # [N_atoms, c_s]
    
    # Add conditioning from single (simplified: use first N tokens)
    N = min(N_atoms, s.shape[0])
    h = x_emb[:N] + s[:N]
    
    # Simple transformation
    h = layer_norm(h)
    W_out = np.random.randn(c_s, 3) * (c_s ** -0.5)
    F_theta = h @ W_out
    
    # Pad if needed
    if N < N_atoms:
        F_theta = np.concatenate([F_theta, np.zeros((N_atoms - N, 3))])
    
    # EDM output
    x_pred = c_skip * x_t + c_out * F_theta
    
    print(f"  Predicted shape: {x_pred.shape}")
    return x_pred

In [None]:
# Test
print("Test: Diffusion Module")
print("="*60)

N_atoms = 64
N = 32
c_s = 128
c_z = 64

# Ground truth
x_0 = np.random.randn(N_atoms, 3) * 10

# Add noise
sigma = 5.0
x_t, noise = diffusion_forward(x_0, sigma)

# Representations
s = np.random.randn(N, c_s)
z = np.random.randn(N, N, c_z)

# Predict
x_pred = diffusion_module_forward(x_t, sigma, s, z)

print(f"\nGround truth RMSD from noisy: {np.sqrt(np.mean((x_t - x_0)**2)):.2f}")
print(f"Prediction finite: {np.isfinite(x_pred).all()}")

## Key Insights

1. **EDM Framework**: Uses Elucidating Diffusion Models preconditioning
2. **Sigma Conditioning**: Network conditioned on noise level
3. **Skip Connection**: c_skip * x_t enables easier learning
4. **Atom-Token Bridge**: Connects atom and token representations