# Generate *de novo* nanobodies to a target protein using Colab and RFantibody
This notebook walks you through the steps to generate novel nanobodies using diffusion.

It uses the RFantibody model from [Rosetta Commons](https://github.com/RosettaCommons/RFantibody), described in detail in [this paper](https://www.biorxiv.org/content/10.1101/2024.03.14.585103v1).

Notebook edited using as scaffold the original version at https://colab.research.google.com/drive/1-zku6ZcDjK2p4rbyd8CS-ZU7ASeFS026?usp=sharing from [Andrew Smith](https://www.linkedin.com/in/andrew-smith-8700a2219/).


In [None]:
#@title 1. Verify your runtime has GPU and CUDA availability

!nvidia-smi > /dev/null 2>&1 && echo "✅ GPU Found" || echo "❌ Please connect to a GPU runtime to use this notebook."

In [None]:
#@title Install RFantibody (Structure-Based de novo Antibody Design)
import os
import subprocess
from tqdm.notebook import tqdm

# 1) Install required Python packages
print('Setting up Colab environment for RFantibody...')

# Install essential packages (quietly)
!pip install tqdm poetry wget py3Dmol --quiet > /dev/null

# Upgrade packaging to avoid "No module named 'packaging.licenses'" error :cite[1]:cite[2]
!pip install --upgrade packaging

# — CONFIG —
REPO_DIR = "/content/RFantibody"
GIT_URL = "https://github.com/RosettaCommons/RFantibody"

# 2) Disable Poetry venv creation so deps install globally (avoids path issues in Colab)
subprocess.run(["poetry", "config", "virtualenvs.create", "false"], check=True)

# 3) Define setup steps
steps = [
    ("1) Downloading DGL wheel", [
        "bash", "-c",
        "mkdir -p include/dgl && "
        "wget https://data.dgl.ai/wheels/torch-2.3/cu118/"
        "dgl-2.4.0%2Bcu118-cp310-cp310-manylinux1_x86_64.whl "
        "-O include/dgl/dgl-2.4.0+cu118-cp310-cp310-manylinux1_x86_64.whl"
    ]),
    ("2) Installing Python dependencies with Poetry", [
        "poetry", "install", "--no-root"
    ]),
    ("3) Downloading model weights", [
        "bash", "include/download_weights.sh"
    ]),
]

# 4) Start in /content, clone or update
total = 1 + sum(1 for d,c in steps)
pbar = tqdm(total=total, desc="RFantibody Setup", unit="step")

# Clone or pull
if not os.path.isdir(REPO_DIR):
    pbar.set_description("Cloning RFantibody repository")
    subprocess.run(["git", "clone", GIT_URL], check=True)
else:
    pbar.set_description("Updating RFantibody repository")
    subprocess.run(["git", "-C", REPO_DIR, "pull"], check=True)

# Get example files
import wget
antibody_folder = "https://raw.githubusercontent.com/amerorchis/AntibodyFiles/refs/heads/main/"
files = ["6m0j_covid_spike.pdb", "4nyl_HLT.pdb"]
local_save_folder = "/content/"

for f in files:
    out = f'{local_save_folder}{f}'
    if not os.path.exists(out):
        try:
            wget.download(f'{antibody_folder}{f}', out=out)
        except Exception as e:
            print(f"Error downloading file {f} with wget: {e}")

# Get USalign binary
out='/content/RFantibody/include/USalign/USalign'
if not os.path.exists(out):
    wget.download(f'{antibody_folder}USalign', out=out)

# Change directory to repo
os.chdir(REPO_DIR)

pbar.update(1)

# Run the rest
for desc, cmd in steps:
    pbar.set_description(desc)
    try:
        subprocess.run(cmd, check=True, capture_output=True)
    except subprocess.CalledProcessError as e:
        print(e)
    pbar.update(1)

pbar.set_description("Setup complete 🎉")
pbar.close()
!pip install tqdm poetry wget py3Dmol

In [None]:
#@markdown ### 🧬 Protein Target and nanobody Framework
#@markdown ---
#@markdown **Upload Antigen PDB File:** Start the cell to upload your antigen's PDB file. This will be used as the fixed target in the diffusion design.
from google.colab import files
print("Please upload the target PDB file:")
uploaded_target = files.upload()
if not uploaded_target:
  raise ValueError("No target file was uploaded. Please run the cell again and upload a PDB file.")
target_pdb_name = next(iter(uploaded_target))
target_pdb = f'/content/{target_pdb_name}'
with open(target_pdb, 'wb') as f:
  f.write(uploaded_target[target_pdb_name])
print(f'✅ Target PDB "{target_pdb_name}" uploaded and saved to {target_pdb}\n')


#@markdown **Upload Antibody Framework PDB File:** Start the cell to upload the nanobody framework PDB. The new CDRs will be grafted onto this scaffold.
print("Please upload the antibody framework PDB file:")
uploaded_framework = files.upload()
if not uploaded_framework:
  raise ValueError("No framework file was uploaded. Please run the cell again and upload a PDB file.")
framework_pdb_name = next(iter(uploaded_framework))
framework_pdb = f'/content/{framework_pdb_name}'
with open(framework_pdb, 'wb') as f:
  f.write(uploaded_framework[framework_pdb_name])
print(f'✅ Framework PDB "{framework_pdb_name}" uploaded and saved to {framework_pdb}')


#@markdown ### 🎯 Epitope and CDRs
#@markdown ---
#@markdown **Epitope Residues:** Comma-separated list of antigen residue IDs defining the epitope region (e.g. one of the patches previously identified; use the chain letter and residue ID: T455,T456,T486,...
hotspot_res = 'T364,T366,T369,T370,T374,T376,T377,T378,T383,T384,T385,T388'  #@param {type:"string"}
#@markdown **CDR Loop Allowed Length:** Specify allowed lengths per CDRs loops (H1 for CDR1, H2 for CDR2 and H3 for CDR3) in “min-max” format, comma-separated.
design_loops = 'H1:8-10,H2:6-6,H3:6-12'  #@param {type:"string"}

#@markdown ### ⚙️ Diffusion and Sampling Settings
#@markdown ---
#@markdown **Number of Designs to Generate:** How many nanobody designs the diffusion sampler should output.
num_designs = 1  #@param {type:"slider", min:1, max:100, step:1}
#@markdown **Final Diffusion Time Step:** The last timestep index at which to apply the denoising network.
final_step = 2  #@param {type:"slider", min:1, max:50, step:1}
#@markdown **Number of Diffusion Time Steps:** Total timesteps in the forward noising chain (higher = finer control).
T = 100  #@param {type:"slider", min:1, max:200, step:1}
#@markdown **Make Runs Deterministic:** If checked, seeds the random number generator so you get repeatable results each run.
deterministic = False  #@param {type:"boolean"}

# You can optionally print the variables to confirm they are set correctly
print("\n✅ Parameters set successfully:")
print(f"   - Target PDB: {target_pdb}")
print(f"   - Framework PDB: {framework_pdb}")
print(f"   - Hotspot Residues: {hotspot_res}")
print(f"   - CDR Lengths: {design_loops}")
print(f"   - Number of Designs: {num_designs}")
print(f"   - Final Timestep: {final_step}")
print(f"   - Total Timesteps: {T}")
print(f"   - Deterministic: {deterministic}")

In [None]:
#@title 4. Generate Antibodies or Nanobodies with RFantibody
#@markdown ####(generation may take a while)
#@markdown ---
#@markdown #### Settings:
Verbose = True  #@param {type:"boolean"}

import shutil
import textwrap
from pathlib import Path
import os
from datetime import date

if os.getcwd() != '/content/RFantibody':
    os.chdir('/content/RFantibody')

# Move inference file to correct location.
shutil.copyfile("/content/RFantibody/scripts/rfdiffusion_inference.py", "/content/RFantibody/src/rfantibody/rfdiffusion/rfdiffusion_inference.py")
today = date.today().isoformat()

print("Generating antibody designs with RFdiffusion...")

# Set configuation for antibody generation run

PYTHONPATH         = '/content/RFantibody/include/SE3Transformer:/content/RFantibody/src:$PYTHONPATH'
pythonscript       = '/content/RFantibody/src/rfantibody/rfdiffusion/rfdiffusion_inference.py'
config_name        = 'antibody'
ckpt_override_path = '/content/RFantibody/weights/RFdiffusion_Ab.pt'
target_name        = target_pdb.split('/')[-1].split('.')[0]
output_folder      = f'outputs/{target_name}/{today}'
output_prefix      = f'{output_folder}/anobody_Design'

os.makedirs(output_folder, exist_ok=True)

# Interpolate the command with all settings
run_command = textwrap.dedent(f"""\
    export HYDRA_FULL_ERROR=1 && \
    PYTHONPATH={PYTHONPATH} \
    poetry run python {pythonscript} \
    --config-name {config_name} \
    antibody.target_pdb={target_pdb} \
    antibody.framework_pdb={framework_pdb} \
    inference.ckpt_override_path={ckpt_override_path} \
    ppi.hotspot_res=[{hotspot_res}] \
    antibody.design_loops=[{design_loops}] \
    inference.num_designs={num_designs} \
    inference.final_step={final_step} \
    diffuser.T={T} \
    inference.deterministic={deterministic} \
    inference.output_prefix={output_prefix} \
""").strip()

# Add output suppression if verbose is not selected
if not Verbose:
    run_command += " > /dev/null 2>&1"

# Execute the command
!{run_command}

print("\n✅ Run finished. Verifying output files...")

output_path = Path(output_prefix)
output_dir  = output_path.parent
prefix      = output_path.name

# grab all files like ab_des_*.pdb
generated = sorted(output_dir.glob(f"{prefix}_*.pdb"))

if len(generated) != num_designs:
    raise RuntimeError(
        f"❌ Expected {num_designs} designs, but found {len(generated)} files."
    )
else:
    print("✅ All outputs generated!")

In [None]:
#@title 5. Rank Antibody Candidates by Mean pLDDT {run: "auto"}
import numpy as np

# Compute mean pLDDT from B‑factor column (cols 61–66 in PDB format)
plddt_scores = {}
for pdb_path in generated:
    vals = []
    with open(pdb_path, 'r') as fh:
        for line in fh:
            if line.startswith(("ATOM", "HETATM")):
                try:
                    vals.append(float(line[60:66]))
                except ValueError:
                    pass
    plddt_scores[pdb_path.name] = np.mean(vals) if vals else float('-inf')

# Sort by descending pLDDT
ranked = sorted(plddt_scores.items(), key=lambda kv: kv[1], reverse=True)

class AntibodyDesign:
    def __init__(self, file, score, rank):
        self.file = f'{output_folder}/{file}'
        self.score = score
        self.rank = rank
        self.number = int(file.split('_')[2].replace('.pdb',''))

    def __str__(self):
        return f'{self.rank}. Design {self.number} ({self.score:.3f} mean pLDDT)'

antibodies = []
print("🏆 Antibody candidates ranked by mean pLDDT:")
for i, (name, score) in enumerate(ranked):
    print(f"{i+1}.  {name}: {score:.3f}")
    antibodies.append(AntibodyDesign(name, score, i+1))

In [None]:
#@title 6. 3D Visualization of Generated Nanobodies {run: "auto"}

import py3Dmol
from pathlib import Path
from ipywidgets import widgets
from IPython.display import display
import math
from google.colab import files

# --- Dropdown to select which design to view ---
if not 'antibodies' in locals() or not antibodies:
    print("❌ No design files found. Please ensure the generation step was successful.")
else:
    dropdown = widgets.Dropdown(
        options=[(str(p), p.file) for p in antibodies],
        description='Design:',
        layout={'width': 'max-content'}
    )

    # --- Download PDB Button ---
    download_pdb_button = widgets.Button(
        description="Download PDB",
        button_style='info',
        tooltip='Download the currently selected PDB file'
    )

    def on_download_pdb_button_clicked(b):
        selected_file = dropdown.value
        if selected_file:
            print(f"Downloading {Path(selected_file).name}...")
            files.download(selected_file)
        else:
            print("No file selected to download.")

    download_pdb_button.on_click(on_download_pdb_button_clicked)

    # --- Download Interacting Residues Button ---
    download_residues_button = widgets.Button(
        description="Download Interacting Residues",
        button_style='success',
        tooltip='Download interacting residues as text file'
    )

    def on_download_residues_button_clicked(b):
        selected_file = dropdown.value
        if selected_file:
            design_name = Path(selected_file).stem
            pdb_content = Path(selected_file).read_text()

            # Parse and find interacting residues
            atoms_H = []
            atoms_T = []
            for L in pdb_content.splitlines():
                if L.startswith(("ATOM  ","HETATM")):
                    chain = L[21]
                    resi  = int(L[22:26])
                    resn  = L[17:20].strip()
                    x, y, z = map(float, (L[30:38], L[38:46], L[46:54]))
                    if chain == 'H':
                        atoms_H.append((resi, resn, (x, y, z)))
                    elif chain == 'T':
                        atoms_T.append((resi, resn, (x, y, z)))

            interact = {}
            for resi, resn, (x,y,z) in atoms_H:
                for _,_,(xt,yt,zt) in atoms_T:
                    if math.dist((x,y,z),(xt,yt,zt)) <= 7.8:
                        interact[resi] = resn
                        break

            # Create and download the text file
            if interact:
                interacting_list = [f"{interact[rnum]}{rnum}" for rnum in sorted(interact)]
                content = ",".join(interacting_list)
                filename = f"Interacting_residues_{design_name}.txt"
                with open(filename, 'w') as f:
                    f.write(content)
                files.download(filename)
                print(f"Downloaded {filename}")
            else:
                print("No interacting residues found to download.")
        else:
            print("No file selected.")

    download_residues_button.on_click(on_download_residues_button_clicked)

    # --- Legend widget ---
    legend = widgets.HTML(
        value="""
        <div style="display:flex; gap:1em; align-items:center; margin-top:8px;">
          <div style="width:12px; height:12px; background:steelblue;"></div>
          <span>Antibody (heavy chain)</span>
          <div style="width:12px; height:12px; background:forestgreen;"></div>
          <span>Antibody (light chain)</span>
          <div style="width:12px; height:12px; background:lightgrey; opacity:0.75;"></div>
          <span>Target (antigen)</span>
          <div style="width:12px; height:12px; background:red;"></div>
          <span>Epitope Hotspots</span>
          <div style="width:12px; height:12px; background:yellow;"></div>
          <span>Interacting Residues</span>
        </div>
        """
    )

    # --- Visualization + print function ---
    def visualize_pdb(pdb_file_path):
        """Creates an interactive 3D view and prints interacting residues on H chain."""
        # load text
        pdb_content = Path(pdb_file_path).read_text()

        # Parse coords and find H–T contacts within 8Å, then print them
        atoms_H = []
        atoms_T = []
        for L in pdb_content.splitlines():
            if L.startswith(("ATOM  ","HETATM")):
                chain = L[21]
                resi  = int(L[22:26])
                resn  = L[17:20].strip()
                x, y, z = map(float, (L[30:38], L[38:46], L[46:54]))
                if chain == 'H':
                    atoms_H.append((resi, resn, (x, y, z)))
                elif chain == 'T':
                    atoms_T.append((resi, resn, (x, y, z)))

        interact = {}
        for resi, resn, (x,y,z) in atoms_H:
            for _,_,(xt,yt,zt) in atoms_T:
                if math.dist((x,y,z),(xt,yt,zt)) <= 7.8:
                    interact[resi] = resn
                    break

        # Print interacting residues
        if interact:
            print("🟡 Interacting residues on chain H (within 8Å backbone of chain T):")
            interacting_list = [f"{interact[rnum]}{rnum}" for rnum in sorted(interact)]
            print(",".join(interacting_list))
        else:
            print("⚠️ No H-chain residues found within 8Å backbone of chain T")

        # 3D visualization
        view = py3Dmol.view(width=800, height=600)
        view.addModel(pdb_content, 'pdb')

        # Antibody as colored cartoons
        view.setStyle({'chain':['H']}, {'cartoon': {'color': 'steelblue'}})
        view.setStyle({'chain':['L']}, {'cartoon': {'color': 'forestgreen'}})

        # Target surface
        view.addSurface(py3Dmol.VDW,
                        {'color': 'lightgrey', 'opacity': 0.75},
                        {'chain': 'T'})

        # Hotspot Highlighting
        h_chain_residues = {int(line[22:26]) for line in pdb_content.splitlines() if line.startswith("ATOM  ") and line[21] == 'H'}
        h_chain_offset = max(h_chain_residues) if h_chain_residues else 0

        hotspots_by_chain = {}
        for res_str in hotspot_res.split(','):
            res_str = res_str.strip()
            if res_str and len(res_str) > 1 and res_str[1:].isdigit():
                chain_id = res_str[0]
                res_num = int(res_str[1:])
                hotspots_by_chain.setdefault(chain_id, []).append(res_num)

        final_hotspot_ids = []
        if 'H' in hotspots_by_chain:
            final_hotspot_ids.extend(hotspots_by_chain['H'])
        if 'T' in hotspots_by_chain:
            final_hotspot_ids.extend([r + h_chain_offset for r in hotspots_by_chain['T']])

        if final_hotspot_ids:
            view.addStyle({'resi': final_hotspot_ids},
                          {'sphere': {'color': 'red', 'radius': 1.5}})

        # Highlight interacting residues on chain H
        view.addStyle({'chain': 'H', 'within': {'distance': 8.1, 'sel': {'chain': 'T'}}},
                      {'stick': {'color': 'yellow'}})

        view.zoomTo()
        view.show()

    # --- Display everything ---
    output = widgets.interactive_output(visualize_pdb,
                                        {'pdb_file_path': dropdown})
    # Arrange widgets for a clean layout
    buttons = widgets.HBox([download_pdb_button, download_residues_button])
    controls = widgets.HBox([dropdown, buttons])
    display(widgets.VBox([controls, output, legend]))
