# Algorithm 29: predictPerResidueLDDT (pLDDT)

pLDDT (predicted Local Distance Difference Test) is AlphaFold2's confidence metric. It predicts how accurate each residue's predicted position is, based on local distance comparisons.

## Algorithm Pseudocode

![predictPerResidueLDDT](../imgs/algorithms/predictPerResidueLDDT_Ca.png)

## Source Code Location
- **File**: `AF2-source-code/model/modules.py`
- **Class**: `PredictedLDDTHead`
- **Lines**: 999-1100

## What is LDDT?

LDDT (Local Distance Difference Test) measures the fraction of local distances that are preserved between predicted and ground truth structures:

$$\text{LDDT} = \frac{1}{|S|} \sum_{i \in S} \frac{1}{4} \sum_{t \in \{0.5, 1, 2, 4\}} \mathbb{1}[|d_{pred} - d_{true}| < t]$$

Where:
- $S$ = set of residue pairs within 15 Å
- $d_{pred}, d_{true}$ = predicted and true CA-CA distances
- $t$ = distance threshold (0.5, 1, 2, 4 Å)

pLDDT predicts this value for each residue.

In [None]:
import numpy as np

np.random.seed(42)

## NumPy Implementation

In [None]:
def layer_norm(x, axis=-1, eps=1e-5):
    mean = np.mean(x, axis=axis, keepdims=True)
    var = np.var(x, axis=axis, keepdims=True)
    return (x - mean) / np.sqrt(var + eps)


def relu(x):
    return np.maximum(x, 0)


def softmax(x, axis=-1):
    x_max = np.max(x, axis=axis, keepdims=True)
    exp_x = np.exp(x - x_max)
    return exp_x / np.sum(exp_x, axis=axis, keepdims=True)


def predict_lddt(structure_module_output, num_bins=50, num_channels=128):
    """
    Predict per-residue LDDT scores.
    
    Algorithm 29 from AlphaFold2 supplementary materials.
    
    Args:
        structure_module_output: Final single representation from structure module [N_res, c_s]
        num_bins: Number of LDDT bins (default 50, representing 0-1 in 0.02 increments)
        num_channels: Hidden layer width
    
    Returns:
        plddt_logits: [N_res, num_bins] logits for each LDDT bin
        plddt_scores: [N_res] predicted LDDT scores (0-100 scale)
    """
    N_res, c_s = structure_module_output.shape
    
    print(f"pLDDT Prediction")
    print(f"="*50)
    print(f"Input: {structure_module_output.shape}")
    
    act = structure_module_output
    
    # Step 1: Layer normalization
    act = layer_norm(act, axis=-1)
    
    # Step 2: First linear + ReLU
    w1 = np.random.randn(c_s, num_channels) * np.sqrt(2.0 / c_s)
    b1 = np.zeros(num_channels)
    act = relu(act @ w1 + b1)
    
    print(f"After first layer: {act.shape}")
    
    # Step 3: Second linear + ReLU
    w2 = np.random.randn(num_channels, num_channels) * np.sqrt(2.0 / num_channels)
    b2 = np.zeros(num_channels)
    act = relu(act @ w2 + b2)
    
    print(f"After second layer: {act.shape}")
    
    # Step 4: Final linear to bin logits
    w3 = np.random.randn(num_channels, num_bins) * 0.01
    b3 = np.zeros(num_bins)
    logits = act @ w3 + b3
    
    print(f"Logits: {logits.shape}")
    
    # Step 5: Convert logits to scores
    # Softmax over bins, then compute expected LDDT
    probs = softmax(logits, axis=-1)
    
    # Bin centers (0.5/50 to 49.5/50, then scale to 0-100)
    bin_centers = (np.arange(num_bins) + 0.5) / num_bins * 100
    
    # Expected LDDT per residue
    plddt_scores = np.sum(probs * bin_centers, axis=-1)
    
    print(f"pLDDT scores: {plddt_scores.shape}")
    print(f"Score range: [{plddt_scores.min():.1f}, {plddt_scores.max():.1f}]")
    
    return logits, plddt_scores

## Test Example

In [None]:
# Test parameters
N_res = 100
c_s = 384  # Structure module output dimension

# Create test input (simulating structure module output)
structure_output = np.random.randn(N_res, c_s).astype(np.float32)

print(f"Input shape: {structure_output.shape}")
print()

In [None]:
# Predict pLDDT
logits, plddt_scores = predict_lddt(
    structure_output,
    num_bins=50,
    num_channels=128
)

print(f"\nStatistics:")
print(f"  Mean pLDDT: {plddt_scores.mean():.1f}")
print(f"  Std pLDDT: {plddt_scores.std():.1f}")

## pLDDT Score Interpretation

AlphaFold2 color-codes structures by pLDDT:

| Score Range | Color | Interpretation |
|-------------|-------|----------------|
| > 90 | Blue | Very high confidence |
| 70-90 | Cyan | Confident |
| 50-70 | Yellow | Low confidence |
| < 50 | Orange | Very low confidence |

In [None]:
# Categorize scores
def categorize_plddt(scores):
    categories = np.zeros(len(scores), dtype=int)
    categories[scores >= 90] = 3  # Very high
    categories[(scores >= 70) & (scores < 90)] = 2  # Confident
    categories[(scores >= 50) & (scores < 70)] = 1  # Low
    categories[scores < 50] = 0  # Very low
    return categories

categories = categorize_plddt(plddt_scores)

print("pLDDT Category Distribution:")
print(f"  Very high (>90): {np.sum(categories == 3)} residues")
print(f"  Confident (70-90): {np.sum(categories == 2)} residues")
print(f"  Low (50-70): {np.sum(categories == 1)} residues")
print(f"  Very low (<50): {np.sum(categories == 0)} residues")

## Compute Actual LDDT (for training)

In [None]:
def compute_lddt(pred_ca, true_ca, mask, cutoff=15.0):
    """
    Compute actual LDDT scores for training supervision.
    
    Args:
        pred_ca: Predicted CA positions [N_res, 3]
        true_ca: Ground truth CA positions [N_res, 3]
        mask: Residue mask [N_res]
        cutoff: Distance cutoff for including pairs (default 15 Å)
    
    Returns:
        lddt: Per-residue LDDT scores [N_res]
    """
    N_res = len(pred_ca)
    
    # Compute pairwise distances
    pred_dist = np.sqrt(np.sum((pred_ca[:, None] - pred_ca[None, :]) ** 2, axis=-1))
    true_dist = np.sqrt(np.sum((true_ca[:, None] - true_ca[None, :]) ** 2, axis=-1))
    
    # Distance difference
    diff = np.abs(pred_dist - true_dist)
    
    # Include pairs within cutoff (based on true structure)
    include_mask = (true_dist < cutoff) & (mask[:, None] * mask[None, :] > 0)
    np.fill_diagonal(include_mask, False)  # Exclude self
    
    # LDDT thresholds
    thresholds = [0.5, 1.0, 2.0, 4.0]
    
    # Compute LDDT per residue
    lddt = np.zeros(N_res)
    for i in range(N_res):
        if np.sum(include_mask[i]) == 0:
            lddt[i] = 0
            continue
        
        scores = []
        for t in thresholds:
            preserved = diff[i, include_mask[i]] < t
            scores.append(np.mean(preserved))
        lddt[i] = np.mean(scores)
    
    return lddt * 100  # Scale to 0-100


# Test with synthetic data
true_ca = np.random.randn(N_res, 3) * 10  # Random ground truth
pred_ca = true_ca + np.random.randn(N_res, 3) * 1.5  # Prediction with noise
mask = np.ones(N_res)

actual_lddt = compute_lddt(pred_ca, true_ca, mask)

print(f"Actual LDDT scores:")
print(f"  Mean: {actual_lddt.mean():.1f}")
print(f"  Range: [{actual_lddt.min():.1f}, {actual_lddt.max():.1f}]")

## Source Code Reference

```python
# From AF2-source-code/model/modules.py

class PredictedLDDTHead(hk.Module):
  """Head to predict the per-residue LDDT to be used as a confidence measure.

  Jumper et al. (2021) Suppl. Sec. 1.9.6 "Model confidence prediction (pLDDT)"
  Jumper et al. (2021) Suppl. Alg. 29 "predictPerResidueLDDT_Ca"
  """

  def __call__(self, representations, batch, is_training):
    act = representations['structure_module']

    act = hk.LayerNorm(axis=[-1], ...)(act)

    act = common_modules.Linear(self.config.num_channels, initializer='relu')(act)
    act = jax.nn.relu(act)

    act = common_modules.Linear(self.config.num_channels, initializer='relu')(act)
    act = jax.nn.relu(act)

    logits = common_modules.Linear(self.config.num_bins, ...)(act)
    return dict(logits=logits)
```

## Key Insights

1. **Simple Architecture**: Just LayerNorm + 2-layer MLP + linear output.

2. **Binned Output**: Predicts distribution over LDDT bins, not direct values.

3. **Input**: Uses final representation from Structure Module (single representation).

4. **Training Target**: Actual LDDT computed from predicted vs. ground truth CA positions.

5. **Scale**: Output is 0-100 (percentage of preserved distances).