<a target="_blank" href="https://colab.research.google.com/github/adaptyvbio/nipah_ipsae_pipeline/Boltz-IPSAE.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Boltz2 - IPSAE pipeline

This notebook can be used to run the evaluation pipeline that will be used in the [Nipah Binder Competition](https://proteinbase.com/competitions/adaptyv-nipah-competition) from AdaptyvBio.

For all submissions this will automatically be calculated.

In [21]:
#@title Install dependencies
#@markdown GPU runtime with a high capacity RAM is strongly recommended. We also have to patch Boltz to prevent an issue with loading weights. 

import os
import subprocess

print('Installing dependencies... ', end='')
dependencies = "torch torchvision torchaudio numpy hydra-core pytorch-lightning pytorch_lightning "
dependencies += "rdkit dm-tree requests pandas types-requests einops einx fairscale "
dependencies += "mashumaro modelcif wandb click pyyaml biopython scipy numba gemmi py3Dmol "
dependencies += "scikit-learn chembl_structure_pipeline "
dependencies += "cuequivariance_ops_cu12 cuequivariance_ops_torch_cu12 cuequivariance_torch"

# for T4/L4
precision = '32-true'

subprocess.run("pip install ipywidgets torch torchvision torchaudio", shell=True)
subprocess.run("git clone https://github.com/jwohlwend/boltz.git", shell=True)
subprocess.run(f"sed -i 's/bf16-mixed/{precision}/g' /content/boltz/src/boltz/main.py", shell=True)
subprocess.run(f"sed -i '1325 s/^)/    weights_only=False)/' boltz/src/boltz/main.py", shell=True)
subprocess.run(f"pip install {dependencies}", shell=True)
subprocess.run("cd boltz; pip install --no-deps -e .", shell=True)


subprocess.run("git clone https://github.com/adaptyvbio/nipah_ipsae_pipeline.git", shell=True)

print('done.')

Installing dependencies... done.


In [12]:
#@title Enter sequence input
#@markdown Type the job title name without blanks in the box below.
jobname = "mydesign" #@param {type:"string"}
sequence= "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLEWVSYISSSSSYTNYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTASYYCARGLAGVWGIDVWGQGTLVTVSS" #@param {type:"string"}

In [28]:
#@title Setup Input
#@markdown Only for single chain binders

import yaml
import tempfile

chains= [{'chain_id': 'B',
   'type': 'protein',
   'sequence': sequence},
 {'chain_id': 'A',
  'type': 'protein',
  'sequence': 'QNYTRSTDNQAVIKDALQGIQQQIKGLADKIGTEIGPKVSLIDTSSTITIPANIGLLGSKISQSTASINENVNEKCKFTLPPLKIHECNISCPNPLPFREYRPQTEGVSNLVGLPNNICLQKTSNQILKPKLISYTLPVVGQSGTCITDPLLAMDEGYFAYSHLERIGSCSRGVSKQRIIGVGEVLDRGDEVPSLFMTNVWTPPNPNTVYHCSAVYNNEFYYVLCAVSTVGDPILNSTYWSGSLMMTRLAVKPKSNGGGYNQHQLALRSIEKGRYDKVMPYGPSGIKQGDTLYFPAVGFLVRTEFKYNDSNCPITKCQYSKPENCRLSMGIRPNSHYILRSGLLKYNLSDGENPKVVFIEISDQRLSIGSPSKIYDSLGQPVFYQASFSWDTMIKFGDVLTVNPLVVNWRNNTVISRPGQSQCPRFNTCPEICWEGVYNDAFLIDRINWISAGVFLDSNQTAENPVFTVFKDNEILYRAQLASEDTNAQKTITNCFLLKNKIWCISLVEIYDTGDNVIRPKLFAVKIPEQCT',
  'msa': 'nipah_ipsae_pipeline/nipah.a3m'}]

data = {"version": 1, "sequences": []}
for chain in chains:
    ctype = chain.get("type")
    cid = chain.get("chain_id")
    entry = {"protein": {"id": cid, "sequence": chain.get("sequence", "")}}
    if chain.get("msa", False):
      entry["protein"]["msa"] = chain.get("msa")
    else:
      entry["protein"]["msa"] = "empty"
    data["sequences"].append(entry)
tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False)
yaml.dump(data, tmp, default_flow_style=False)
tmp.close()
yaml_path = tmp.name


In [29]:
#@title Run Prediction
#@markdown Run Boltz2 and print pae values
!boltz predict $yaml_path --out_dir ./$jobname  --write_full_pae

Checking input data.
Processing 1 inputs with 1 threads.
  0% 0/1 [00:00<?, ?it/s]Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.
100% 1/1 [00:00<00:00,  5.30it/s]100% 1/1 [00:00<00:00,  5.29it/s]
ðŸ’¡ Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
Running structure prediction for 1 input.
2025-11-07 14:37:05.627164: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762526225.873137    5249 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one ha

In [30]:
#@title Output structure
#@markdown Boltz-2 structure prediction, Blue binder, Gray target
def get_latest_folder(directory):
    """Finds the most recently created folder in a directory."""
    folders = [os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]
    latest_folder = max(folders, key=os.path.getctime)
    return os.path.basename(latest_folder).replace("boltz_results_","")

latest_boltz_run = get_latest_folder(f'./{jobname}')
latest_boltz_run

import py3Dmol
view = py3Dmol.view()
view.addModel(open(f"/content/{jobname}/boltz_results_{latest_boltz_run}/predictions/{latest_boltz_run}/{latest_boltz_run}_model_0.cif", 'r').read(),'cif')
view.setBackgroundColor('white')
view.setStyle({'chain':'A'}, {'cartoon': {'color':'gray'}})
view.addStyle({'chain':'B'}, {'cartoon': {'color':'blue'}})
view.zoomTo()
view.show()

In [34]:
#@title Calculate IPSAE score of prediction
#@markdown By default uses the most recent prediction in the jobname folder
import os
import pandas as pd



def calculate_ipsae(
    pae_file_path,
    structure_file_path,
    pae_cutoff=15.0,
    dist_cutoff=15.0,
):
    """
    Calculate ipSAE and related scores for protein-protein interactions.

    Parameters:
    -----------
    pae_file_path : str
        Path to the PAE file (JSON for AF2/AF3, NPZ for Boltz1)
    structure_file_path : str
        Path to the structure file (PDB for AF2, mmCIF for AF3/Boltz1)
    pae_cutoff : float
        Cutoff value for PAE in score calculations
    dist_cutoff : float
        Cutoff value for distance in score calculations

    Returns:
    --------
    dict
        Dictionary containing all calculated scores
    """

    os.system(f"python nipah_ipsae_pipeline/ipsae.py {pae_file_path} {structure_file_path} {pae_cutoff} {dist_cutoff}")

    print(f"Reading results from {structure_file_path.replace('.cif',  f'_{int(pae_cutoff)}_{int(dist_cutoff)}.txt')}")

    df = pd.read_csv(structure_file_path.replace('.cif', f'_{int(pae_cutoff)}_{int(dist_cutoff)}.txt'))
    results = {}


    for i, row in df[df.Type=="max"].iterrows():
        chainpair = f"{row['Chn1']}-{row['Chn2']}"

        results[chainpair] = {
            "max": {
                **{col: row[col] for col in df.columns[5:-1]}
            }
        }
        mask = (df['Chn1'] == row['Chn1']) & (df['Chn2'] == row['Chn2']) & (df['Type'] != "max")
        min_vals = df[mask][df.columns[5:-1]].min()
        results[chainpair]["min"] = min_vals.to_dict()

    return results
calculate_ipsae(f"/content/{jobname}/boltz_results_{latest_boltz_run}/predictions/{latest_boltz_run}/pae_{latest_boltz_run}_model_0.npz",f"/content/{jobname}/boltz_results_{latest_boltz_run}/predictions/{latest_boltz_run}/{latest_boltz_run}_model_0.cif")

Reading results from /content/mydesign/boltz_results_tmp6a55t696/predictions/tmp6a55t696/tmp6a55t696_model_0_15_15.txt


{'A-B': {'max': {'ipSAE': 0.186852,
   'ipSAE_d0chn': 0.318758,
   'ipSAE_d0dom': 0.280244,
   'ipTM_af': 0.574,
   'ipTM_d0chn': 0.305798,
   'pDockQ': 0.2944,
   'pDockQ2': 0.0265,
   'LIS': 0.0475,
   'n0res': 282,
   'n0chn': 651,
   'n0dom': 556,
   'd0res': 6.18,
   'd0chn': 8.86,
   'd0dom': 8.3,
   'nres1': 438,
   'nres2': 118,
   'dist1': 82,
   'dist2': 49},
  'min': {'ipSAE': 0.083236,
   'ipSAE_d0chn': 0.30719,
   'ipSAE_d0dom': 0.280244,
   'ipTM_af': 0.574,
   'ipTM_d0chn': 0.305798,
   'pDockQ': 0.2944,
   'pDockQ2': 0.0265,
   'LIS': 0.0294,
   'n0res': 118.0,
   'n0chn': 651.0,
   'n0dom': 556.0,
   'd0res': 4.01,
   'd0chn': 8.86,
   'd0dom': 8.3,
   'nres1': 438.0,
   'nres2': 118.0,
   'dist1': 82.0,
   'dist2': 49.0}}}